Spaces:

liuyuan-pal
/

SyncDreamer

Runtime error

App Files Files Community

liuyuan-pal commited on Sep 13, 2023

Commit

d0f39be

•

1 Parent(s): df62e57

add models

Browse files

Files changed (23) hide show

.gitattributes +1 -0
blender_script.py +0 -282
ckpt/ViT-L-14.pt +3 -0
ckpt/syncdreamer-pretrain.ckpt +3 -0
foreground_segment.py +0 -50
raymarching/__init__.py +0 -1
raymarching/backend.py +0 -40
raymarching/raymarching.py +0 -373
raymarching/setup.py +0 -62
raymarching/src/bindings.cpp +0 -19
raymarching/src/raymarching.cu +0 -914
raymarching/src/raymarching.h +0 -18
render_batch.py +0 -20
renderer/agg_net.py +0 -83
renderer/cost_reg_net.py +0 -95
renderer/dummy_dataset.py +0 -40
renderer/feature_net.py +0 -42
renderer/neus_networks.py +0 -503
renderer/ngp_renderer.py +0 -721
renderer/renderer.py +0 -604
requirements.txt +0 -1
train_renderer.py +0 -187
train_syncdreamer.py +0 -307

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+ckpt/* filter=lfs diff=lfs merge=lfs -text

blender_script.py DELETED Viewed

@@ -1,282 +0,0 @@
-"""Blender script to render images of 3D models.
-This script is used to render images of 3D models. It takes in a list of paths
-to .glb files and renders images of each model. The images are from rotating the
-object around the origin. The images are saved to the output directory.
-Example usage:
-    blender -b -P blender_script.py -- \
-        --object_path my_object.glb \
-        --output_dir ./views \
-        --engine CYCLES \
-        --scale 0.8 \
-        --num_images 12 \
-        --camera_dist 1.2
-Here, input_model_paths.json is a json file containing a list of paths to .glb.
-"""
-import argparse
-import json
-import math
-import os
-import random
-import sys
-import time
-import urllib.request
-from pathlib import Path
-from mathutils import Vector, Matrix
-import numpy as np
-import bpy
-from mathutils import Vector
-import pickle
-def read_pickle(pkl_path):
-    with open(pkl_path, 'rb') as f:
-        return pickle.load(f)
-def save_pickle(data, pkl_path):
-    # os.system('mkdir -p {}'.format(os.path.dirname(pkl_path)))
-    with open(pkl_path, 'wb') as f:
-        pickle.dump(data, f)
-parser = argparse.ArgumentParser()
-parser.add_argument("--object_path", type=str, required=True)
-parser.add_argument("--output_dir", type=str, required=True)
-parser.add_argument("--engine", type=str, default="CYCLES", choices=["CYCLES", "BLENDER_EEVEE"])
-parser.add_argument("--camera_type", type=str, default='even')
-parser.add_argument("--num_images", type=int, default=16)
-parser.add_argument("--elevation", type=float, default=30)
-parser.add_argument("--elevation_start", type=float, default=-10)
-parser.add_argument("--elevation_end", type=float, default=40)
-parser.add_argument("--device", type=str, default='CUDA')
-argv = sys.argv[sys.argv.index("--") + 1 :]
-args = parser.parse_args(argv)
-print('===================', args.engine, '===================')
-context = bpy.context
-scene = context.scene
-render = scene.render
-cam = scene.objects["Camera"]
-cam.location = (0, 1.2, 0)
-cam.data.lens = 35
-cam.data.sensor_width = 32
-cam_constraint = cam.constraints.new(type="TRACK_TO")
-cam_constraint.track_axis = "TRACK_NEGATIVE_Z"
-cam_constraint.up_axis = "UP_Y"
-render.engine = args.engine
-render.image_settings.file_format = "PNG"
-render.image_settings.color_mode = "RGBA"
-render.resolution_x = 256
-render.resolution_y = 256
-render.resolution_percentage = 100
-scene.cycles.device = "GPU"
-scene.cycles.samples = 128
-scene.cycles.diffuse_bounces = 1
-scene.cycles.glossy_bounces = 1
-scene.cycles.transparent_max_bounces = 3
-scene.cycles.transmission_bounces = 3
-scene.cycles.filter_width = 0.01
-scene.cycles.use_denoising = True
-scene.render.film_transparent = True
-bpy.context.preferences.addons["cycles"].preferences.get_devices()
-# Set the device_type
-bpy.context.preferences.addons["cycles"].preferences.compute_device_type = args.device # or "OPENCL"
-bpy.context.scene.cycles.tile_size = 8192
-def az_el_to_points(azimuths, elevations):
-    x = np.cos(azimuths)*np.cos(elevations)
-    y = np.sin(azimuths)*np.cos(elevations)
-    z = np.sin(elevations)
-    return np.stack([x,y,z],-1) #
-def set_camera_location(cam_pt):
-    # from https://blender.stackexchange.com/questions/18530/
-    x, y, z = cam_pt # sample_spherical(radius_min=1.5, radius_max=2.2, maxz=2.2, minz=-2.2)
-    camera = bpy.data.objects["Camera"]
-    camera.location = x, y, z
-    return camera
-def get_calibration_matrix_K_from_blender(camera):
-    f_in_mm = camera.data.lens
-    scene = bpy.context.scene
-    resolution_x_in_px = scene.render.resolution_x
-    resolution_y_in_px = scene.render.resolution_y
-    scale = scene.render.resolution_percentage / 100
-    sensor_width_in_mm = camera.data.sensor_width
-    sensor_height_in_mm = camera.data.sensor_height
-    pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
-    if camera.data.sensor_fit == 'VERTICAL':
-        # the sensor height is fixed (sensor fit is horizontal),
-        # the sensor width is effectively changed with the pixel aspect ratio
-        s_u = resolution_x_in_px * scale / sensor_width_in_mm / pixel_aspect_ratio
-        s_v = resolution_y_in_px * scale / sensor_height_in_mm
-    else:  # 'HORIZONTAL' and 'AUTO'
-        # the sensor width is fixed (sensor fit is horizontal),
-        # the sensor height is effectively changed with the pixel aspect ratio
-        s_u = resolution_x_in_px * scale / sensor_width_in_mm
-        s_v = resolution_y_in_px * scale * pixel_aspect_ratio / sensor_height_in_mm
-    # Parameters of intrinsic calibration matrix K
-    alpha_u = f_in_mm * s_u
-    alpha_v = f_in_mm * s_u
-    u_0 = resolution_x_in_px * scale / 2
-    v_0 = resolution_y_in_px * scale / 2
-    skew = 0  # only use rectangular pixels
-    K = np.asarray(((alpha_u, skew, u_0),
-                    (0, alpha_v, v_0),
-                    (0, 0, 1)),np.float32)
-    return K
-def reset_scene() -> None:
-    """Resets the scene to a clean state."""
-    # delete everything that isn't part of a camera or a light
-    for obj in bpy.data.objects:
-        if obj.type not in {"CAMERA", "LIGHT"}:
-            bpy.data.objects.remove(obj, do_unlink=True)
-    # delete all the materials
-    for material in bpy.data.materials:
-        bpy.data.materials.remove(material, do_unlink=True)
-    # delete all the textures
-    for texture in bpy.data.textures:
-        bpy.data.textures.remove(texture, do_unlink=True)
-    # delete all the images
-    for image in bpy.data.images:
-        bpy.data.images.remove(image, do_unlink=True)
-# load the glb model
-def load_object(object_path: str) -> None:
-    """Loads a glb model into the scene."""
-    if object_path.endswith(".glb"):
-        bpy.ops.import_scene.gltf(filepath=object_path, merge_vertices=True)
-    elif object_path.endswith(".fbx"):
-        bpy.ops.import_scene.fbx(filepath=object_path)
-    else:
-        raise ValueError(f"Unsupported file type: {object_path}")
-def scene_bbox(single_obj=None, ignore_matrix=False):
-    bbox_min = (math.inf,) * 3
-    bbox_max = (-math.inf,) * 3
-    found = False
-    for obj in scene_meshes() if single_obj is None else [single_obj]:
-        found = True
-        for coord in obj.bound_box:
-            coord = Vector(coord)
-            if not ignore_matrix:
-                coord = obj.matrix_world @ coord
-            bbox_min = tuple(min(x, y) for x, y in zip(bbox_min, coord))
-            bbox_max = tuple(max(x, y) for x, y in zip(bbox_max, coord))
-    if not found:
-        raise RuntimeError("no objects in scene to compute bounding box for")
-    return Vector(bbox_min), Vector(bbox_max)
-def scene_root_objects():
-    for obj in bpy.context.scene.objects.values():
-        if not obj.parent:
-            yield obj
-def scene_meshes():
-    for obj in bpy.context.scene.objects.values():
-        if isinstance(obj.data, (bpy.types.Mesh)):
-            yield obj
-# function from https://github.com/panmari/stanford-shapenet-renderer/blob/master/render_blender.py
-def get_3x4_RT_matrix_from_blender(cam):
-    bpy.context.view_layer.update()
-    location, rotation = cam.matrix_world.decompose()[0:2]
-    R = np.asarray(rotation.to_matrix())
-    t = np.asarray(location)
-    cam_rec = np.asarray([[1, 0, 0], [0, -1, 0], [0, 0, -1]], np.float32)
-    R = R.T
-    t = -R @ t
-    R_world2cv = cam_rec @ R
-    t_world2cv = cam_rec @ t
-    RT = np.concatenate([R_world2cv,t_world2cv[:,None]],1)
-    return RT
-def normalize_scene():
-    bbox_min, bbox_max = scene_bbox()
-    scale = 1 / max(bbox_max - bbox_min)
-    for obj in scene_root_objects():
-        obj.scale = obj.scale * scale
-    # Apply scale to matrix_world.
-    bpy.context.view_layer.update()
-    bbox_min, bbox_max = scene_bbox()
-    offset = -(bbox_min + bbox_max) / 2
-    for obj in scene_root_objects():
-        obj.matrix_world.translation += offset
-    bpy.ops.object.select_all(action="DESELECT")
-def save_images(object_file: str) -> None:
-    object_uid = os.path.basename(object_file).split(".")[0]
-    os.makedirs(args.output_dir, exist_ok=True)
-    reset_scene()
-    # load the object
-    load_object(object_file)
-    # object_uid = os.path.basename(object_file).split(".")[0]
-    normalize_scene()
-    # create an empty object to track
-    empty = bpy.data.objects.new("Empty", None)
-    scene.collection.objects.link(empty)
-    cam_constraint.target = empty
-    world_tree = bpy.context.scene.world.node_tree
-    back_node = world_tree.nodes['Background']
-    env_light = 0.5
-    back_node.inputs['Color'].default_value = Vector([env_light, env_light, env_light, 1.0])
-    back_node.inputs['Strength'].default_value = 1.0
-    distances = np.asarray([1.5 for _ in range(args.num_images)])
-    if args.camera_type=='fixed':
-        azimuths = (np.arange(args.num_images)/args.num_images*np.pi*2).astype(np.float32)
-        elevations = np.deg2rad(np.asarray([args.elevation] * args.num_images).astype(np.float32))
-    elif args.camera_type=='random':
-        azimuths = (np.arange(args.num_images) / args.num_images * np.pi * 2).astype(np.float32)
-        elevations = np.random.uniform(args.elevation_start, args.elevation_end, args.num_images)
-        elevations = np.deg2rad(elevations)
-    else:
-        raise NotImplementedError
-    cam_pts = az_el_to_points(azimuths, elevations) * distances[:,None]
-    cam_poses = []
-    (Path(args.output_dir) / object_uid).mkdir(exist_ok=True, parents=True)
-    for i in range(args.num_images):
-        # set camera
-        camera = set_camera_location(cam_pts[i])
-        RT = get_3x4_RT_matrix_from_blender(camera)
-        cam_poses.append(RT)
-        render_path = os.path.join(args.output_dir, object_uid, f"{i:03d}.png")
-        if os.path.exists(render_path): continue
-        scene.render.filepath = os.path.abspath(render_path)
-        bpy.ops.render.render(write_still=True)
-    if args.camera_type=='random':
-        K = get_calibration_matrix_K_from_blender(camera)
-        cam_poses = np.stack(cam_poses, 0)
-        save_pickle([K, azimuths, elevations, distances, cam_poses], os.path.join(args.output_dir, object_uid, "meta.pkl"))
-if __name__ == "__main__":
-    save_images(args.object_path)

ckpt/ViT-L-14.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836
+size 932768134

ckpt/syncdreamer-pretrain.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ebb31334d9e4002b2590dd805e25238beaf95fa082f6e39a132344624448dcb
+size 5570034171

foreground_segment.py DELETED Viewed

@@ -1,50 +0,0 @@
-import cv2
-import argparse
-import numpy as np
-import torch
-from PIL import Image
-class BackgroundRemoval:
-    def __init__(self, device='cuda'):
-        from carvekit.api.high import HiInterface
-        self.interface = HiInterface(
-            object_type="object",  # Can be "object" or "hairs-like".
-            batch_size_seg=5,
-            batch_size_matting=1,
-            device=device,
-            seg_mask_size=640,  # Use 640 for Tracer B7 and 320 for U2Net
-            matting_mask_size=2048,
-            trimap_prob_threshold=231,
-            trimap_dilation=30,
-            trimap_erosion_iters=5,
-            fp16=True,
-        )
-    @torch.no_grad()
-    def __call__(self, image):
-        # image: [H, W, 3] array in [0, 255].
-        image = Image.fromarray(image)
-        image = self.interface([image])[0]
-        image = np.array(image)
-        return image
-def process(image_path, mask_path):
-    mask_predictor = BackgroundRemoval()
-    image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
-    if image.shape[-1] == 4:
-        image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
-    else:
-        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    rgba = mask_predictor(image)  # [H, W, 4]
-    cv2.imwrite(mask_path, cv2.cvtColor(rgba, cv2.COLOR_RGBA2BGRA))
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--input', required=True, type=str)
-    parser.add_argument('--output', required=True, type=str)
-    opt = parser.parse_args()
-    process(opt.input, opt.output)

raymarching/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from .raymarching import *

raymarching/backend.py DELETED Viewed

@@ -1,40 +0,0 @@
-import os
-from torch.utils.cpp_extension import load
-_src_path = os.path.dirname(os.path.abspath(__file__))
-nvcc_flags = [
-    '-O3', '-std=c++14',
-    '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__',
-]
-if os.name == "posix":
-    c_flags = ['-O3', '-std=c++14']
-elif os.name == "nt":
-    c_flags = ['/O2', '/std:c++17']
-    # find cl.exe
-    def find_cl_path():
-        import glob
-        for edition in ["Enterprise", "Professional", "BuildTools", "Community"]:
-            paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True)
-            if paths:
-                return paths[0]
-    # If cl.exe is not on path, try to find it.
-    if os.system("where cl.exe >nul 2>nul") != 0:
-        cl_path = find_cl_path()
-        if cl_path is None:
-            raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
-        os.environ["PATH"] += ";" + cl_path
-_backend = load(name='_raymarching',
-                extra_cflags=c_flags,
-                extra_cuda_cflags=nvcc_flags,
-                sources=[os.path.join(_src_path, 'src', f) for f in [
-                    'raymarching.cu',
-                    'bindings.cpp',
-                ]],
-                )
-__all__ = ['_backend']

raymarching/raymarching.py DELETED Viewed

@@ -1,373 +0,0 @@
-import numpy as np
-import time
-import torch
-import torch.nn as nn
-from torch.autograd import Function
-from torch.cuda.amp import custom_bwd, custom_fwd
-try:
-    import _raymarching as _backend
-except ImportError:
-    from .backend import _backend
-# ----------------------------------------
-# utils
-# ----------------------------------------
-class _near_far_from_aabb(Function):
-    @staticmethod
-    @custom_fwd(cast_inputs=torch.float32)
-    def forward(ctx, rays_o, rays_d, aabb, min_near=0.2):
-        ''' near_far_from_aabb, CUDA implementation
-        Calculate rays' intersection time (near and far) with aabb
-        Args:
-            rays_o: float, [N, 3]
-            rays_d: float, [N, 3]
-            aabb: float, [6], (xmin, ymin, zmin, xmax, ymax, zmax)
-            min_near: float, scalar
-        Returns:
-            nears: float, [N]
-            fars: float, [N]
-        '''
-        if not rays_o.is_cuda: rays_o = rays_o.cuda()
-        if not rays_d.is_cuda: rays_d = rays_d.cuda()
-        rays_o = rays_o.contiguous().view(-1, 3)
-        rays_d = rays_d.contiguous().view(-1, 3)
-        N = rays_o.shape[0] # num rays
-        nears = torch.empty(N, dtype=rays_o.dtype, device=rays_o.device)
-        fars = torch.empty(N, dtype=rays_o.dtype, device=rays_o.device)
-        _backend.near_far_from_aabb(rays_o, rays_d, aabb, N, min_near, nears, fars)
-        return nears, fars
-near_far_from_aabb = _near_far_from_aabb.apply
-class _sph_from_ray(Function):
-    @staticmethod
-    @custom_fwd(cast_inputs=torch.float32)
-    def forward(ctx, rays_o, rays_d, radius):
-        ''' sph_from_ray, CUDA implementation
-        get spherical coordinate on the background sphere from rays.
-        Assume rays_o are inside the Sphere(radius).
-        Args:
-            rays_o: [N, 3]
-            rays_d: [N, 3]
-            radius: scalar, float
-        Return:
-            coords: [N, 2], in [-1, 1], theta and phi on a sphere. (further-surface)
-        '''
-        if not rays_o.is_cuda: rays_o = rays_o.cuda()
-        if not rays_d.is_cuda: rays_d = rays_d.cuda()
-        rays_o = rays_o.contiguous().view(-1, 3)
-        rays_d = rays_d.contiguous().view(-1, 3)
-        N = rays_o.shape[0] # num rays
-        coords = torch.empty(N, 2, dtype=rays_o.dtype, device=rays_o.device)
-        _backend.sph_from_ray(rays_o, rays_d, radius, N, coords)
-        return coords
-sph_from_ray = _sph_from_ray.apply
-class _morton3D(Function):
-    @staticmethod
-    def forward(ctx, coords):
-        ''' morton3D, CUDA implementation
-        Args:
-            coords: [N, 3], int32, in [0, 128) (for some reason there is no uint32 tensor in torch...)
-            TODO: check if the coord range is valid! (current 128 is safe)
-        Returns:
-            indices: [N], int32, in [0, 128^3)
-        '''
-        if not coords.is_cuda: coords = coords.cuda()
-        N = coords.shape[0]
-        indices = torch.empty(N, dtype=torch.int32, device=coords.device)
-        _backend.morton3D(coords.int(), N, indices)
-        return indices
-morton3D = _morton3D.apply
-class _morton3D_invert(Function):
-    @staticmethod
-    def forward(ctx, indices):
-        ''' morton3D_invert, CUDA implementation
-        Args:
-            indices: [N], int32, in [0, 128^3)
-        Returns:
-            coords: [N, 3], int32, in [0, 128)
-        '''
-        if not indices.is_cuda: indices = indices.cuda()
-        N = indices.shape[0]
-        coords = torch.empty(N, 3, dtype=torch.int32, device=indices.device)
-        _backend.morton3D_invert(indices.int(), N, coords)
-        return coords
-morton3D_invert = _morton3D_invert.apply
-class _packbits(Function):
-    @staticmethod
-    @custom_fwd(cast_inputs=torch.float32)
-    def forward(ctx, grid, thresh, bitfield=None):
-        ''' packbits, CUDA implementation
-        Pack up the density grid into a bit field to accelerate ray marching.
-        Args:
-            grid: float, [C, H * H * H], assume H % 2 == 0
-            thresh: float, threshold
-        Returns:
-            bitfield: uint8, [C, H * H * H / 8]
-        '''
-        if not grid.is_cuda: grid = grid.cuda()
-        grid = grid.contiguous()
-        C = grid.shape[0]
-        H3 = grid.shape[1]
-        N = C * H3 // 8
-        if bitfield is None:
-            bitfield = torch.empty(N, dtype=torch.uint8, device=grid.device)
-        _backend.packbits(grid, N, thresh, bitfield)
-        return bitfield
-packbits = _packbits.apply
-# ----------------------------------------
-# train functions
-# ----------------------------------------
-class _march_rays_train(Function):
-    @staticmethod
-    @custom_fwd(cast_inputs=torch.float32)
-    def forward(ctx, rays_o, rays_d, bound, density_bitfield, C, H, nears, fars, step_counter=None, mean_count=-1, perturb=False, align=-1, force_all_rays=False, dt_gamma=0, max_steps=1024):
-        ''' march rays to generate points (forward only)
-        Args:
-            rays_o/d: float, [N, 3]
-            bound: float, scalar
-            density_bitfield: uint8: [CHHH // 8]
-            C: int
-            H: int
-            nears/fars: float, [N]
-            step_counter: int32, (2), used to count the actual number of generated points.
-            mean_count: int32, estimated mean steps to accelerate training. (but will randomly drop rays if the actual point count exceeded this threshold.)
-            perturb: bool
-            align: int, pad output so its size is dividable by align, set to -1 to disable.
-            force_all_rays: bool, ignore step_counter and mean_count, always calculate all rays. Useful if rendering the whole image, instead of some rays.
-            dt_gamma: float, called cone_angle in instant-ngp, exponentially accelerate ray marching if > 0. (very significant effect, but generally lead to worse performance)
-            max_steps: int, max number of sampled points along each ray, also affect min_stepsize.
-        Returns:
-            xyzs: float, [M, 3], all generated points' coords. (all rays concated, need to use `rays` to extract points belonging to each ray)
-            dirs: float, [M, 3], all generated points' view dirs.
-            deltas: float, [M, 2], all generated points' deltas. (first for RGB, second for Depth)
-            rays: int32, [N, 3], all rays' (index, point_offset, point_count), e.g., xyzs[rays[i, 1]:rays[i, 2]] --> points belonging to rays[i, 0]
-        '''
-        if not rays_o.is_cuda: rays_o = rays_o.cuda()
-        if not rays_d.is_cuda: rays_d = rays_d.cuda()
-        if not density_bitfield.is_cuda: density_bitfield = density_bitfield.cuda()
-        rays_o = rays_o.contiguous().view(-1, 3)
-        rays_d = rays_d.contiguous().view(-1, 3)
-        density_bitfield = density_bitfield.contiguous()
-        N = rays_o.shape[0] # num rays
-        M = N * max_steps # init max points number in total
-        # running average based on previous epoch (mimic `measured_batch_size_before_compaction` in instant-ngp)
-        # It estimate the max points number to enable faster training, but will lead to random ignored rays if underestimated.
-        if not force_all_rays and mean_count > 0:
-            if align > 0:
-                mean_count += align - mean_count % align
-            M = mean_count
-        xyzs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device)
-        dirs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device)
-        deltas = torch.zeros(M, 2, dtype=rays_o.dtype, device=rays_o.device)
-        rays = torch.empty(N, 3, dtype=torch.int32, device=rays_o.device) # id, offset, num_steps
-        if step_counter is None:
-            step_counter = torch.zeros(2, dtype=torch.int32, device=rays_o.device) # point counter, ray counter
-        if perturb:
-            noises = torch.rand(N, dtype=rays_o.dtype, device=rays_o.device)
-        else:
-            noises = torch.zeros(N, dtype=rays_o.dtype, device=rays_o.device)
-        _backend.march_rays_train(rays_o, rays_d, density_bitfield, bound, dt_gamma, max_steps, N, C, H, M, nears, fars, xyzs, dirs, deltas, rays, step_counter, noises) # m is the actually used points number
-        #print(step_counter, M)
-        # only used at the first (few) epochs.
-        if force_all_rays or mean_count <= 0:
-            m = step_counter[0].item() # D2H copy
-            if align > 0:
-                m += align - m % align
-            xyzs = xyzs[:m]
-            dirs = dirs[:m]
-            deltas = deltas[:m]
-            torch.cuda.empty_cache()
-        return xyzs, dirs, deltas, rays
-march_rays_train = _march_rays_train.apply
-class _composite_rays_train(Function):
-    @staticmethod
-    @custom_fwd(cast_inputs=torch.float32)
-    def forward(ctx, sigmas, rgbs, deltas, rays, T_thresh=1e-4):
-        ''' composite rays' rgbs, according to the ray marching formula.
-        Args:
-            rgbs: float, [M, 3]
-            sigmas: float, [M,]
-            deltas: float, [M, 2]
-            rays: int32, [N, 3]
-        Returns:
-            weights_sum: float, [N,], the alpha channel
-            depth: float, [N, ], the Depth
-            image: float, [N, 3], the RGB channel (after multiplying alpha!)
-        '''
-        sigmas = sigmas.contiguous()
-        rgbs = rgbs.contiguous()
-        M = sigmas.shape[0]
-        N = rays.shape[0]
-        weights_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device)
-        depth = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device)
-        image = torch.empty(N, 3, dtype=sigmas.dtype, device=sigmas.device)
-        _backend.composite_rays_train_forward(sigmas, rgbs, deltas, rays, M, N, T_thresh, weights_sum, depth, image)
-        ctx.save_for_backward(sigmas, rgbs, deltas, rays, weights_sum, depth, image)
-        ctx.dims = [M, N, T_thresh]
-        return weights_sum, depth, image
-    @staticmethod
-    @custom_bwd
-    def backward(ctx, grad_weights_sum, grad_depth, grad_image):
-        # NOTE: grad_depth is not used now! It won't be propagated to sigmas.
-        grad_weights_sum = grad_weights_sum.contiguous()
-        grad_image = grad_image.contiguous()
-        sigmas, rgbs, deltas, rays, weights_sum, depth, image = ctx.saved_tensors
-        M, N, T_thresh = ctx.dims
-        grad_sigmas = torch.zeros_like(sigmas)
-        grad_rgbs = torch.zeros_like(rgbs)
-        _backend.composite_rays_train_backward(grad_weights_sum, grad_image, sigmas, rgbs, deltas, rays, weights_sum, image, M, N, T_thresh, grad_sigmas, grad_rgbs)
-        return grad_sigmas, grad_rgbs, None, None, None
-composite_rays_train = _composite_rays_train.apply
-# ----------------------------------------
-# infer functions
-# ----------------------------------------
-class _march_rays(Function):
-    @staticmethod
-    @custom_fwd(cast_inputs=torch.float32)
-    def forward(ctx, n_alive, n_step, rays_alive, rays_t, rays_o, rays_d, bound, density_bitfield, C, H, near, far, align=-1, perturb=False, dt_gamma=0, max_steps=1024):
-        ''' march rays to generate points (forward only, for inference)
-        Args:
-            n_alive: int, number of alive rays
-            n_step: int, how many steps we march
-            rays_alive: int, [N], the alive rays' IDs in N (N >= n_alive, but we only use first n_alive)
-            rays_t: float, [N], the alive rays' time, we only use the first n_alive.
-            rays_o/d: float, [N, 3]
-            bound: float, scalar
-            density_bitfield: uint8: [CHHH // 8]
-            C: int
-            H: int
-            nears/fars: float, [N]
-            align: int, pad output so its size is dividable by align, set to -1 to disable.
-            perturb: bool/int, int > 0 is used as the random seed.
-            dt_gamma: float, called cone_angle in instant-ngp, exponentially accelerate ray marching if > 0. (very significant effect, but generally lead to worse performance)
-            max_steps: int, max number of sampled points along each ray, also affect min_stepsize.
-        Returns:
-            xyzs: float, [n_alive * n_step, 3], all generated points' coords
-            dirs: float, [n_alive * n_step, 3], all generated points' view dirs.
-            deltas: float, [n_alive * n_step, 2], all generated points' deltas (here we record two deltas, the first is for RGB, the second for depth).
-        '''
-        if not rays_o.is_cuda: rays_o = rays_o.cuda()
-        if not rays_d.is_cuda: rays_d = rays_d.cuda()
-        rays_o = rays_o.contiguous().view(-1, 3)
-        rays_d = rays_d.contiguous().view(-1, 3)
-        M = n_alive * n_step
-        if align > 0:
-            M += align - (M % align)
-        xyzs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device)
-        dirs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device)
-        deltas = torch.zeros(M, 2, dtype=rays_o.dtype, device=rays_o.device) # 2 vals, one for rgb, one for depth
-        if perturb:
-            # torch.manual_seed(perturb) # test_gui uses spp index as seed
-            noises = torch.rand(n_alive, dtype=rays_o.dtype, device=rays_o.device)
-        else:
-            noises = torch.zeros(n_alive, dtype=rays_o.dtype, device=rays_o.device)
-        _backend.march_rays(n_alive, n_step, rays_alive, rays_t, rays_o, rays_d, bound, dt_gamma, max_steps, C, H, density_bitfield, near, far, xyzs, dirs, deltas, noises)
-        return xyzs, dirs, deltas
-march_rays = _march_rays.apply
-class _composite_rays(Function):
-    @staticmethod
-    @custom_fwd(cast_inputs=torch.float32) # need to cast sigmas & rgbs to float
-    def forward(ctx, n_alive, n_step, rays_alive, rays_t, sigmas, rgbs, deltas, weights_sum, depth, image, T_thresh=1e-2):
-        ''' composite rays' rgbs, according to the ray marching formula. (for inference)
-        Args:
-            n_alive: int, number of alive rays
-            n_step: int, how many steps we march
-            rays_alive: int, [n_alive], the alive rays' IDs in N (N >= n_alive)
-            rays_t: float, [N], the alive rays' time
-            sigmas: float, [n_alive * n_step,]
-            rgbs: float, [n_alive * n_step, 3]
-            deltas: float, [n_alive * n_step, 2], all generated points' deltas (here we record two deltas, the first is for RGB, the second for depth).
-        In-place Outputs:
-            weights_sum: float, [N,], the alpha channel
-            depth: float, [N,], the depth value
-            image: float, [N, 3], the RGB channel (after multiplying alpha!)
-        '''
-        _backend.composite_rays(n_alive, n_step, T_thresh, rays_alive, rays_t, sigmas, rgbs, deltas, weights_sum, depth, image)
-        return tuple()
-composite_rays = _composite_rays.apply

raymarching/setup.py DELETED Viewed

@@ -1,62 +0,0 @@
-import os
-from setuptools import setup
-from torch.utils.cpp_extension import BuildExtension, CUDAExtension
-_src_path = os.path.dirname(os.path.abspath(__file__))
-nvcc_flags = [
-    '-O3', '-std=c++14',
-    '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__',
-]
-if os.name == "posix":
-    c_flags = ['-O3', '-std=c++14']
-elif os.name == "nt":
-    c_flags = ['/O2', '/std:c++17']
-    # find cl.exe
-    def find_cl_path():
-        import glob
-        for edition in ["Enterprise", "Professional", "BuildTools", "Community"]:
-            paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True)
-            if paths:
-                return paths[0]
-    # If cl.exe is not on path, try to find it.
-    if os.system("where cl.exe >nul 2>nul") != 0:
-        cl_path = find_cl_path()
-        if cl_path is None:
-            raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
-        os.environ["PATH"] += ";" + cl_path
-'''
-Usage:
-python setup.py build_ext --inplace # build extensions locally, do not install (only can be used from the parent directory)
-python setup.py install # build extensions and install (copy) to PATH.
-pip install . # ditto but better (e.g., dependency & metadata handling)
-python setup.py develop # build extensions and install (symbolic) to PATH.
-pip install -e . # ditto but better (e.g., dependency & metadata handling)
-'''
-setup(
-    name='raymarching', # package name, import this to use python API
-    ext_modules=[
-        CUDAExtension(
-            name='_raymarching', # extension name, import this to use CUDA API
-            sources=[os.path.join(_src_path, 'src', f) for f in [
-                'raymarching.cu',
-                'bindings.cpp',
-            ]],
-            extra_compile_args={
-                'cxx': c_flags,
-                'nvcc': nvcc_flags,
-            }
-        ),
-    ],
-    cmdclass={
-        'build_ext': BuildExtension,
-    }
-)

raymarching/src/bindings.cpp DELETED Viewed

@@ -1,19 +0,0 @@
-#include <torch/extension.h>
-#include "raymarching.h"
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-    // utils
-    m.def("packbits", &packbits, "packbits (CUDA)");
-    m.def("near_far_from_aabb", &near_far_from_aabb, "near_far_from_aabb (CUDA)");
-    m.def("sph_from_ray", &sph_from_ray, "sph_from_ray (CUDA)");
-    m.def("morton3D", &morton3D, "morton3D (CUDA)");
-    m.def("morton3D_invert", &morton3D_invert, "morton3D_invert (CUDA)");
-    // train
-    m.def("march_rays_train", &march_rays_train, "march_rays_train (CUDA)");
-    m.def("composite_rays_train_forward", &composite_rays_train_forward, "composite_rays_train_forward (CUDA)");
-    m.def("composite_rays_train_backward", &composite_rays_train_backward, "composite_rays_train_backward (CUDA)");
-    // infer
-    m.def("march_rays", &march_rays, "march rays (CUDA)");
-    m.def("composite_rays", &composite_rays, "composite rays (CUDA)");
-}

raymarching/src/raymarching.cu DELETED Viewed

@@ -1,914 +0,0 @@
-#include <cuda.h>
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-#include <ATen/cuda/CUDAContext.h>
-#include <torch/torch.h>
-#include <cstdio>
-#include <stdint.h>
-#include <stdexcept>
-#include <limits>
-#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
-#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor")
-#define CHECK_IS_INT(x) TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, #x " must be an int tensor")
-#define CHECK_IS_FLOATING(x) TORCH_CHECK(x.scalar_type() == at::ScalarType::Float || x.scalar_type() == at::ScalarType::Half || x.scalar_type() == at::ScalarType::Double, #x " must be a floating tensor")
-inline constexpr __device__ float SQRT3() { return 1.7320508075688772f; }
-inline constexpr __device__ float RSQRT3() { return 0.5773502691896258f; }
-inline constexpr __device__ float PI() { return 3.141592653589793f; }
-inline constexpr __device__ float RPI() { return 0.3183098861837907f; }
-template <typename T>
-inline __host__ __device__ T div_round_up(T val, T divisor) {
-    return (val + divisor - 1) / divisor;
-}
-inline __host__ __device__ float signf(const float x) {
-    return copysignf(1.0, x);
-}
-inline __host__ __device__ float clamp(const float x, const float min, const float max) {
-    return fminf(max, fmaxf(min, x));
-}
-inline __host__ __device__ void swapf(float& a, float& b) {
-    float c = a; a = b; b = c;
-}
-inline __device__ int mip_from_pos(const float x, const float y, const float z, const float max_cascade) {
-    const float mx = fmaxf(fabsf(x), fmaxf(fabs(y), fabs(z)));
-    int exponent;
-    frexpf(mx, &exponent); // [0, 0.5) --> -1, [0.5, 1) --> 0, [1, 2) --> 1, [2, 4) --> 2, ...
-    return fminf(max_cascade - 1, fmaxf(0, exponent));
-}
-inline __device__ int mip_from_dt(const float dt, const float H, const float max_cascade) {
-    const float mx = dt * H * 0.5;
-    int exponent;
-    frexpf(mx, &exponent);
-    return fminf(max_cascade - 1, fmaxf(0, exponent));
-}
-inline __host__ __device__ uint32_t __expand_bits(uint32_t v)
-{
-	v = (v * 0x00010001u) & 0xFF0000FFu;
-	v = (v * 0x00000101u) & 0x0F00F00Fu;
-	v = (v * 0x00000011u) & 0xC30C30C3u;
-	v = (v * 0x00000005u) & 0x49249249u;
-	return v;
-}
-inline __host__ __device__ uint32_t __morton3D(uint32_t x, uint32_t y, uint32_t z)
-{
-	uint32_t xx = __expand_bits(x);
-	uint32_t yy = __expand_bits(y);
-	uint32_t zz = __expand_bits(z);
-	return xx | (yy << 1) | (zz << 2);
-}
-inline __host__ __device__ uint32_t __morton3D_invert(uint32_t x)
-{
-	x = x & 0x49249249;
-	x = (x | (x >> 2)) & 0xc30c30c3;
-	x = (x | (x >> 4)) & 0x0f00f00f;
-	x = (x | (x >> 8)) & 0xff0000ff;
-	x = (x | (x >> 16)) & 0x0000ffff;
-	return x;
-}
-////////////////////////////////////////////////////
-/////////////           utils          /////////////
-////////////////////////////////////////////////////
-// rays_o/d: [N, 3]
-// nears/fars: [N]
-// scalar_t should always be float in use.
-template <typename scalar_t>
-__global__ void kernel_near_far_from_aabb(
-    const scalar_t * __restrict__ rays_o,
-    const scalar_t * __restrict__ rays_d,
-    const scalar_t * __restrict__ aabb,
-    const uint32_t N,
-    const float min_near,
-    scalar_t * nears, scalar_t * fars
-) {
-    // parallel per ray
-    const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x;
-    if (n >= N) return;
-    // locate
-    rays_o += n * 3;
-    rays_d += n * 3;
-    const float ox = rays_o[0], oy = rays_o[1], oz = rays_o[2];
-    const float dx = rays_d[0], dy = rays_d[1], dz = rays_d[2];
-    const float rdx = 1 / dx, rdy = 1 / dy, rdz = 1 / dz;
-    // get near far (assume cube scene)
-    float near = (aabb[0] - ox) * rdx;
-    float far = (aabb[3] - ox) * rdx;
-    if (near > far) swapf(near, far);
-    float near_y = (aabb[1] - oy) * rdy;
-    float far_y = (aabb[4] - oy) * rdy;
-    if (near_y > far_y) swapf(near_y, far_y);
-    if (near > far_y || near_y > far) {
-        nears[n] = fars[n] = std::numeric_limits<scalar_t>::max();
-        return;
-    }
-    if (near_y > near) near = near_y;
-    if (far_y < far) far = far_y;
-    float near_z = (aabb[2] - oz) * rdz;
-    float far_z = (aabb[5] - oz) * rdz;
-    if (near_z > far_z) swapf(near_z, far_z);
-    if (near > far_z || near_z > far) {
-        nears[n] = fars[n] = std::numeric_limits<scalar_t>::max();
-        return;
-    }
-    if (near_z > near) near = near_z;
-    if (far_z < far) far = far_z;
-    if (near < min_near) near = min_near;
-    nears[n] = near;
-    fars[n] = far;
-}
-void near_far_from_aabb(const at::Tensor rays_o, const at::Tensor rays_d, const at::Tensor aabb, const uint32_t N, const float min_near, at::Tensor nears, at::Tensor fars) {
-    static constexpr uint32_t N_THREAD = 128;
-    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-    rays_o.scalar_type(), "near_far_from_aabb", ([&] {
-        kernel_near_far_from_aabb<<<div_round_up(N, N_THREAD), N_THREAD>>>(rays_o.data_ptr<scalar_t>(), rays_d.data_ptr<scalar_t>(), aabb.data_ptr<scalar_t>(), N, min_near, nears.data_ptr<scalar_t>(), fars.data_ptr<scalar_t>());
-    }));
-}
-// rays_o/d: [N, 3]
-// radius: float
-// coords: [N, 2]
-template <typename scalar_t>
-__global__ void kernel_sph_from_ray(
-    const scalar_t * __restrict__ rays_o,
-    const scalar_t * __restrict__ rays_d,
-    const float radius,
-    const uint32_t N,
-    scalar_t * coords
-) {
-    // parallel per ray
-    const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x;
-    if (n >= N) return;
-    // locate
-    rays_o += n * 3;
-    rays_d += n * 3;
-    coords += n * 2;
-    const float ox = rays_o[0], oy = rays_o[1], oz = rays_o[2];
-    const float dx = rays_d[0], dy = rays_d[1], dz = rays_d[2];
-    const float rdx = 1 / dx, rdy = 1 / dy, rdz = 1 / dz;
-    // solve t from || o + td || = radius
-    const float A = dx * dx + dy * dy + dz * dz;
-    const float B = ox * dx + oy * dy + oz * dz; // in fact B / 2
-    const float C = ox * ox + oy * oy + oz * oz - radius * radius;
-    const float t = (- B + sqrtf(B * B - A * C)) / A; // always use the larger solution (positive)
-    // solve theta, phi (assume y is the up axis)
-    const float x = ox + t * dx, y = oy + t * dy, z = oz + t * dz;
-    const float theta = atan2(sqrtf(x * x + z * z), y); // [0, PI)
-    const float phi = atan2(z, x); // [-PI, PI)
-    // normalize to [-1, 1]
-    coords[0] = 2 * theta * RPI() - 1;
-    coords[1] = phi * RPI();
-}
-void sph_from_ray(const at::Tensor rays_o, const at::Tensor rays_d, const float radius, const uint32_t N, at::Tensor coords) {
-    static constexpr uint32_t N_THREAD = 128;
-    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-    rays_o.scalar_type(), "sph_from_ray", ([&] {
-        kernel_sph_from_ray<<<div_round_up(N, N_THREAD), N_THREAD>>>(rays_o.data_ptr<scalar_t>(), rays_d.data_ptr<scalar_t>(), radius, N, coords.data_ptr<scalar_t>());
-    }));
-}
-// coords: int32, [N, 3]
-// indices: int32, [N]
-__global__ void kernel_morton3D(
-    const int * __restrict__ coords,
-    const uint32_t N,
-    int * indices
-) {
-    // parallel
-    const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x;
-    if (n >= N) return;
-    // locate
-    coords += n * 3;
-    indices[n] = __morton3D(coords[0], coords[1], coords[2]);
-}
-void morton3D(const at::Tensor coords, const uint32_t N, at::Tensor indices) {
-    static constexpr uint32_t N_THREAD = 128;
-    kernel_morton3D<<<div_round_up(N, N_THREAD), N_THREAD>>>(coords.data_ptr<int>(), N, indices.data_ptr<int>());
-}
-// indices: int32, [N]
-// coords: int32, [N, 3]
-__global__ void kernel_morton3D_invert(
-    const int * __restrict__ indices,
-    const uint32_t N,
-    int * coords
-) {
-    // parallel
-    const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x;
-    if (n >= N) return;
-    // locate
-    coords += n * 3;
-    const int ind = indices[n];
-    coords[0] = __morton3D_invert(ind >> 0);
-    coords[1] = __morton3D_invert(ind >> 1);
-    coords[2] = __morton3D_invert(ind >> 2);
-}
-void morton3D_invert(const at::Tensor indices, const uint32_t N, at::Tensor coords) {
-    static constexpr uint32_t N_THREAD = 128;
-    kernel_morton3D_invert<<<div_round_up(N, N_THREAD), N_THREAD>>>(indices.data_ptr<int>(), N, coords.data_ptr<int>());
-}
-// grid: float, [C, H, H, H]
-// N: int, C * H * H * H / 8
-// density_thresh: float
-// bitfield: uint8, [N]
-template <typename scalar_t>
-__global__ void kernel_packbits(
-    const scalar_t * __restrict__ grid,
-    const uint32_t N,
-    const float density_thresh,
-    uint8_t * bitfield
-) {
-    // parallel per byte
-    const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x;
-    if (n >= N) return;
-    // locate
-    grid += n * 8;
-    uint8_t bits = 0;
-    #pragma unroll
-    for (uint8_t i = 0; i < 8; i++) {
-        bits |= (grid[i] > density_thresh) ? ((uint8_t)1 << i) : 0;
-    }
-    bitfield[n] = bits;
-}
-void packbits(const at::Tensor grid, const uint32_t N, const float density_thresh, at::Tensor bitfield) {
-    static constexpr uint32_t N_THREAD = 128;
-    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-    grid.scalar_type(), "packbits", ([&] {
-        kernel_packbits<<<div_round_up(N, N_THREAD), N_THREAD>>>(grid.data_ptr<scalar_t>(), N, density_thresh, bitfield.data_ptr<uint8_t>());
-    }));
-}
-////////////////////////////////////////////////////
-/////////////         training         /////////////
-////////////////////////////////////////////////////
-// rays_o/d: [N, 3]
-// grid: [CHHH / 8]
-// xyzs, dirs, deltas: [M, 3], [M, 3], [M, 2]
-// dirs: [M, 3]
-// rays: [N, 3], idx, offset, num_steps
-template <typename scalar_t>
-__global__ void kernel_march_rays_train(
-    const scalar_t * __restrict__ rays_o,
-    const scalar_t * __restrict__ rays_d,
-    const uint8_t * __restrict__ grid,
-    const float bound,
-    const float dt_gamma, const uint32_t max_steps,
-    const uint32_t N, const uint32_t C, const uint32_t H, const uint32_t M,
-    const scalar_t* __restrict__ nears,
-    const scalar_t* __restrict__ fars,
-    scalar_t * xyzs, scalar_t * dirs, scalar_t * deltas,
-    int * rays,
-    int * counter,
-    const scalar_t* __restrict__ noises
-) {
-    // parallel per ray
-    const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x;
-    if (n >= N) return;
-    // locate
-    rays_o += n * 3;
-    rays_d += n * 3;
-    // ray marching
-    const float ox = rays_o[0], oy = rays_o[1], oz = rays_o[2];
-    const float dx = rays_d[0], dy = rays_d[1], dz = rays_d[2];
-    const float rdx = 1 / dx, rdy = 1 / dy, rdz = 1 / dz;
-    const float rH = 1 / (float)H;
-    const float H3 = H * H * H;
-    const float near = nears[n];
-    const float far = fars[n];
-    const float noise = noises[n];
-    const float dt_min = 2 * SQRT3() / max_steps;
-    const float dt_max = 2 * SQRT3() * (1 << (C - 1)) / H;
-    float t0 = near;
-    // perturb
-    t0 += clamp(t0 * dt_gamma, dt_min, dt_max) * noise;
-    // first pass: estimation of num_steps
-    float t = t0;
-    uint32_t num_steps = 0;
-    //if (t < far) printf("valid ray %d t=%f near=%f far=%f \n", n, t, near, far);
-    while (t < far && num_steps < max_steps) {
-        // current point
-        const float x = clamp(ox + t * dx, -bound, bound);
-        const float y = clamp(oy + t * dy, -bound, bound);
-        const float z = clamp(oz + t * dz, -bound, bound);
-        const float dt = clamp(t * dt_gamma, dt_min, dt_max);
-        // get mip level
-        const int level = max(mip_from_pos(x, y, z, C), mip_from_dt(dt, H, C)); // range in [0, C - 1]
-        const float mip_bound = fminf(scalbnf(1.0f, level), bound);
-        const float mip_rbound = 1 / mip_bound;
-        // convert to nearest grid position
-        const int nx = clamp(0.5 * (x * mip_rbound + 1) * H, 0.0f, (float)(H - 1));
-        const int ny = clamp(0.5 * (y * mip_rbound + 1) * H, 0.0f, (float)(H - 1));
-        const int nz = clamp(0.5 * (z * mip_rbound + 1) * H, 0.0f, (float)(H - 1));
-        const uint32_t index = level * H3 + __morton3D(nx, ny, nz);
-        const bool occ = grid[index / 8] & (1 << (index % 8));
-        // if occpuied, advance a small step, and write to output
-        //if (n == 0) printf("t=%f density=%f vs thresh=%f step=%d\n", t, density, density_thresh, num_steps);
-        if (occ) {
-            num_steps++;
-            t += dt;
-        // else, skip a large step (basically skip a voxel grid)
-        } else {
-            // calc distance to next voxel
-            const float tx = (((nx + 0.5f + 0.5f * signf(dx)) * rH * 2 - 1) * mip_bound - x) * rdx;
-            const float ty = (((ny + 0.5f + 0.5f * signf(dy)) * rH * 2 - 1) * mip_bound - y) * rdy;
-            const float tz = (((nz + 0.5f + 0.5f * signf(dz)) * rH * 2 - 1) * mip_bound - z) * rdz;
-            const float tt = t + fmaxf(0.0f, fminf(tx, fminf(ty, tz)));
-            // step until next voxel
-            do {
-                t += clamp(t * dt_gamma, dt_min, dt_max);
-            } while (t < tt);
-        }
-    }
-    //printf("[n=%d] num_steps=%d, near=%f, far=%f, dt=%f, max_steps=%f\n", n, num_steps, near, far, dt_min, (far - near) / dt_min);
-    // second pass: really locate and write points & dirs
-    uint32_t point_index = atomicAdd(counter, num_steps);
-    uint32_t ray_index = atomicAdd(counter + 1, 1);
-    //printf("[n=%d] num_steps=%d, point_index=%d, ray_index=%d\n", n, num_steps, point_index, ray_index);
-    // write rays
-    rays[ray_index * 3] = n;
-    rays[ray_index * 3 + 1] = point_index;
-    rays[ray_index * 3 + 2] = num_steps;
-    if (num_steps == 0) return;
-    if (point_index + num_steps > M) return;
-    xyzs += point_index * 3;
-    dirs += point_index * 3;
-    deltas += point_index * 2;
-    t = t0;
-    uint32_t step = 0;
-    float last_t = t;
-    while (t < far && step < num_steps) {
-        // current point
-        const float x = clamp(ox + t * dx, -bound, bound);
-        const float y = clamp(oy + t * dy, -bound, bound);
-        const float z = clamp(oz + t * dz, -bound, bound);
-        const float dt = clamp(t * dt_gamma, dt_min, dt_max);
-        // get mip level
-        const int level = max(mip_from_pos(x, y, z, C), mip_from_dt(dt, H, C)); // range in [0, C - 1]
-        const float mip_bound = fminf(scalbnf(1.0f, level), bound);
-        const float mip_rbound = 1 / mip_bound;
-        // convert to nearest grid position
-        const int nx = clamp(0.5 * (x * mip_rbound + 1) * H, 0.0f, (float)(H - 1));
-        const int ny = clamp(0.5 * (y * mip_rbound + 1) * H, 0.0f, (float)(H - 1));
-        const int nz = clamp(0.5 * (z * mip_rbound + 1) * H, 0.0f, (float)(H - 1));
-        // query grid
-        const uint32_t index = level * H3 + __morton3D(nx, ny, nz);
-        const bool occ = grid[index / 8] & (1 << (index % 8));
-        // if occpuied, advance a small step, and write to output
-        if (occ) {
-            // write step
-            xyzs[0] = x;
-            xyzs[1] = y;
-            xyzs[2] = z;
-            dirs[0] = dx;
-            dirs[1] = dy;
-            dirs[2] = dz;
-            t += dt;
-            deltas[0] = dt;
-            deltas[1] = t - last_t; // used to calc depth
-            last_t = t;
-            xyzs += 3;
-            dirs += 3;
-            deltas += 2;
-            step++;
-        // else, skip a large step (basically skip a voxel grid)
-        } else {
-            // calc distance to next voxel
-            const float tx = (((nx + 0.5f + 0.5f * signf(dx)) * rH * 2 - 1) * mip_bound - x) * rdx;
-            const float ty = (((ny + 0.5f + 0.5f * signf(dy)) * rH * 2 - 1) * mip_bound - y) * rdy;
-            const float tz = (((nz + 0.5f + 0.5f * signf(dz)) * rH * 2 - 1) * mip_bound - z) * rdz;
-            const float tt = t + fmaxf(0.0f, fminf(tx, fminf(ty, tz)));
-            // step until next voxel
-            do {
-                t += clamp(t * dt_gamma, dt_min, dt_max);
-            } while (t < tt);
-        }
-    }
-}
-void march_rays_train(const at::Tensor rays_o, const at::Tensor rays_d, const at::Tensor grid, const float bound, const float dt_gamma, const uint32_t max_steps, const uint32_t N, const uint32_t C, const uint32_t H, const uint32_t M, const at::Tensor nears, const at::Tensor fars, at::Tensor xyzs, at::Tensor dirs, at::Tensor deltas, at::Tensor rays, at::Tensor counter, at::Tensor noises) {
-    static constexpr uint32_t N_THREAD = 128;
-    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-    rays_o.scalar_type(), "march_rays_train", ([&] {
-        kernel_march_rays_train<<<div_round_up(N, N_THREAD), N_THREAD>>>(rays_o.data_ptr<scalar_t>(), rays_d.data_ptr<scalar_t>(), grid.data_ptr<uint8_t>(), bound, dt_gamma, max_steps, N, C, H, M, nears.data_ptr<scalar_t>(), fars.data_ptr<scalar_t>(), xyzs.data_ptr<scalar_t>(), dirs.data_ptr<scalar_t>(), deltas.data_ptr<scalar_t>(), rays.data_ptr<int>(), counter.data_ptr<int>(), noises.data_ptr<scalar_t>());
-    }));
-}
-// sigmas: [M]
-// rgbs: [M, 3]
-// deltas: [M, 2]
-// rays: [N, 3], idx, offset, num_steps
-// weights_sum: [N], final pixel alpha
-// depth: [N,]
-// image: [N, 3]
-template <typename scalar_t>
-__global__ void kernel_composite_rays_train_forward(
-    const scalar_t * __restrict__ sigmas,
-    const scalar_t * __restrict__ rgbs,
-    const scalar_t * __restrict__ deltas,
-    const int * __restrict__ rays,
-    const uint32_t M, const uint32_t N, const float T_thresh,
-    scalar_t * weights_sum,
-    scalar_t * depth,
-    scalar_t * image
-) {
-    // parallel per ray
-    const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x;
-    if (n >= N) return;
-    // locate
-    uint32_t index = rays[n * 3];
-    uint32_t offset = rays[n * 3 + 1];
-    uint32_t num_steps = rays[n * 3 + 2];
-    // empty ray, or ray that exceed max step count.
-    if (num_steps == 0 || offset + num_steps > M) {
-        weights_sum[index] = 0;
-        depth[index] = 0;
-        image[index * 3] = 0;
-        image[index * 3 + 1] = 0;
-        image[index * 3 + 2] = 0;
-        return;
-    }
-    sigmas += offset;
-    rgbs += offset * 3;
-    deltas += offset * 2;
-    // accumulate
-    uint32_t step = 0;
-    scalar_t T = 1.0f;
-    scalar_t r = 0, g = 0, b = 0, ws = 0, t = 0, d = 0;
-    while (step < num_steps) {
-        const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]);
-        const scalar_t weight = alpha * T;
-        r += weight * rgbs[0];
-        g += weight * rgbs[1];
-        b += weight * rgbs[2];
-        t += deltas[1]; // real delta
-        d += weight * t;
-        ws += weight;
-        T *= 1.0f - alpha;
-        // minimal remained transmittence
-        if (T < T_thresh) break;
-        //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d);
-        // locate
-        sigmas++;
-        rgbs += 3;
-        deltas += 2;
-        step++;
-    }
-    //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d);
-    // write
-    weights_sum[index] = ws; // weights_sum
-    depth[index] = d;
-    image[index * 3] = r;
-    image[index * 3 + 1] = g;
-    image[index * 3 + 2] = b;
-}
-void composite_rays_train_forward(const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor deltas, const at::Tensor rays, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor weights_sum, at::Tensor depth, at::Tensor image) {
-    static constexpr uint32_t N_THREAD = 128;
-    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-    sigmas.scalar_type(), "composite_rays_train_forward", ([&] {
-        kernel_composite_rays_train_forward<<<div_round_up(N, N_THREAD), N_THREAD>>>(sigmas.data_ptr<scalar_t>(), rgbs.data_ptr<scalar_t>(), deltas.data_ptr<scalar_t>(), rays.data_ptr<int>(), M, N, T_thresh, weights_sum.data_ptr<scalar_t>(), depth.data_ptr<scalar_t>(), image.data_ptr<scalar_t>());
-    }));
-}
-// grad_weights_sum: [N,]
-// grad: [N, 3]
-// sigmas: [M]
-// rgbs: [M, 3]
-// deltas: [M, 2]
-// rays: [N, 3], idx, offset, num_steps
-// weights_sum: [N,], weights_sum here
-// image: [N, 3]
-// grad_sigmas: [M]
-// grad_rgbs: [M, 3]
-template <typename scalar_t>
-__global__ void kernel_composite_rays_train_backward(
-    const scalar_t * __restrict__ grad_weights_sum,
-    const scalar_t * __restrict__ grad_image,
-    const scalar_t * __restrict__ sigmas,
-    const scalar_t * __restrict__ rgbs,
-    const scalar_t * __restrict__ deltas,
-    const int * __restrict__ rays,
-    const scalar_t * __restrict__ weights_sum,
-    const scalar_t * __restrict__ image,
-    const uint32_t M, const uint32_t N, const float T_thresh,
-    scalar_t * grad_sigmas,
-    scalar_t * grad_rgbs
-) {
-    // parallel per ray
-    const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x;
-    if (n >= N) return;
-    // locate
-    uint32_t index = rays[n * 3];
-    uint32_t offset = rays[n * 3 + 1];
-    uint32_t num_steps = rays[n * 3 + 2];
-    if (num_steps == 0 || offset + num_steps > M) return;
-    grad_weights_sum += index;
-    grad_image += index * 3;
-    weights_sum += index;
-    image += index * 3;
-    sigmas += offset;
-    rgbs += offset * 3;
-    deltas += offset * 2;
-    grad_sigmas += offset;
-    grad_rgbs += offset * 3;
-    // accumulate
-    uint32_t step = 0;
-    scalar_t T = 1.0f;
-    const scalar_t r_final = image[0], g_final = image[1], b_final = image[2], ws_final = weights_sum[0];
-    scalar_t r = 0, g = 0, b = 0, ws = 0;
-    while (step < num_steps) {
-        const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]);
-        const scalar_t weight = alpha * T;
-        r += weight * rgbs[0];
-        g += weight * rgbs[1];
-        b += weight * rgbs[2];
-        ws += weight;
-        T *= 1.0f - alpha;
-        // check https://note.kiui.moe/others/nerf_gradient/ for the gradient calculation.
-        // write grad_rgbs
-        grad_rgbs[0] = grad_image[0] * weight;
-        grad_rgbs[1] = grad_image[1] * weight;
-        grad_rgbs[2] = grad_image[2] * weight;
-        // write grad_sigmas
-        grad_sigmas[0] = deltas[0] * (
-            grad_image[0] * (T * rgbs[0] - (r_final - r)) +
-            grad_image[1] * (T * rgbs[1] - (g_final - g)) +
-            grad_image[2] * (T * rgbs[2] - (b_final - b)) +
-            grad_weights_sum[0] * (1 - ws_final)
-        );
-        //printf("[n=%d] num_steps=%d, T=%f, grad_sigmas=%f, r_final=%f, r=%f\n", n, step, T, grad_sigmas[0], r_final, r);
-        // minimal remained transmittence
-        if (T < T_thresh) break;
-        // locate
-        sigmas++;
-        rgbs += 3;
-        deltas += 2;
-        grad_sigmas++;
-        grad_rgbs += 3;
-        step++;
-    }
-}
-void composite_rays_train_backward(const at::Tensor grad_weights_sum, const at::Tensor grad_image, const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor deltas, const at::Tensor rays, const at::Tensor weights_sum, const at::Tensor image, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor grad_sigmas, at::Tensor grad_rgbs) {
-    static constexpr uint32_t N_THREAD = 128;
-    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-    grad_image.scalar_type(), "composite_rays_train_backward", ([&] {
-        kernel_composite_rays_train_backward<<<div_round_up(N, N_THREAD), N_THREAD>>>(grad_weights_sum.data_ptr<scalar_t>(), grad_image.data_ptr<scalar_t>(), sigmas.data_ptr<scalar_t>(), rgbs.data_ptr<scalar_t>(), deltas.data_ptr<scalar_t>(), rays.data_ptr<int>(), weights_sum.data_ptr<scalar_t>(), image.data_ptr<scalar_t>(), M, N, T_thresh, grad_sigmas.data_ptr<scalar_t>(), grad_rgbs.data_ptr<scalar_t>());
-    }));
-}
-////////////////////////////////////////////////////
-/////////////          infernce        /////////////
-////////////////////////////////////////////////////
-template <typename scalar_t>
-__global__ void kernel_march_rays(
-    const uint32_t n_alive,
-    const uint32_t n_step,
-    const int* __restrict__ rays_alive,
-    const scalar_t* __restrict__ rays_t,
-    const scalar_t* __restrict__ rays_o,
-    const scalar_t* __restrict__ rays_d,
-    const float bound,
-    const float dt_gamma, const uint32_t max_steps,
-    const uint32_t C, const uint32_t H,
-    const uint8_t * __restrict__ grid,
-    const scalar_t* __restrict__ nears,
-    const scalar_t* __restrict__ fars,
-    scalar_t* xyzs, scalar_t* dirs, scalar_t* deltas,
-    const scalar_t* __restrict__ noises
-) {
-    const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x;
-    if (n >= n_alive) return;
-    const int index = rays_alive[n]; // ray id
-    const float noise = noises[n];
-    // locate
-    rays_o += index * 3;
-    rays_d += index * 3;
-    xyzs += n * n_step * 3;
-    dirs += n * n_step * 3;
-    deltas += n * n_step * 2;
-    const float ox = rays_o[0], oy = rays_o[1], oz = rays_o[2];
-    const float dx = rays_d[0], dy = rays_d[1], dz = rays_d[2];
-    const float rdx = 1 / dx, rdy = 1 / dy, rdz = 1 / dz;
-    const float rH = 1 / (float)H;
-    const float H3 = H * H * H;
-    float t = rays_t[index]; // current ray's t
-    const float near = nears[index], far = fars[index];
-    const float dt_min = 2 * SQRT3() / max_steps;
-    const float dt_max = 2 * SQRT3() * (1 << (C - 1)) / H;
-    // march for n_step steps, record points
-    uint32_t step = 0;
-    // introduce some randomness
-    t += clamp(t * dt_gamma, dt_min, dt_max) * noise;
-    float last_t = t;
-    while (t < far && step < n_step) {
-        // current point
-        const float x = clamp(ox + t * dx, -bound, bound);
-        const float y = clamp(oy + t * dy, -bound, bound);
-        const float z = clamp(oz + t * dz, -bound, bound);
-        const float dt = clamp(t * dt_gamma, dt_min, dt_max);
-        // get mip level
-        const int level = max(mip_from_pos(x, y, z, C), mip_from_dt(dt, H, C)); // range in [0, C - 1]
-        const float mip_bound = fminf(scalbnf(1, level), bound);
-        const float mip_rbound = 1 / mip_bound;
-        // convert to nearest grid position
-        const int nx = clamp(0.5 * (x * mip_rbound + 1) * H, 0.0f, (float)(H - 1));
-        const int ny = clamp(0.5 * (y * mip_rbound + 1) * H, 0.0f, (float)(H - 1));
-        const int nz = clamp(0.5 * (z * mip_rbound + 1) * H, 0.0f, (float)(H - 1));
-        const uint32_t index = level * H3 + __morton3D(nx, ny, nz);
-        const bool occ = grid[index / 8] & (1 << (index % 8));
-        // if occpuied, advance a small step, and write to output
-        if (occ) {
-            // write step
-            xyzs[0] = x;
-            xyzs[1] = y;
-            xyzs[2] = z;
-            dirs[0] = dx;
-            dirs[1] = dy;
-            dirs[2] = dz;
-            // calc dt
-            t += dt;
-            deltas[0] = dt;
-            deltas[1] = t - last_t; // used to calc depth
-            last_t = t;
-            // step
-            xyzs += 3;
-            dirs += 3;
-            deltas += 2;
-            step++;
-        // else, skip a large step (basically skip a voxel grid)
-        } else {
-            // calc distance to next voxel
-            const float tx = (((nx + 0.5f + 0.5f * signf(dx)) * rH * 2 - 1) * mip_bound - x) * rdx;
-            const float ty = (((ny + 0.5f + 0.5f * signf(dy)) * rH * 2 - 1) * mip_bound - y) * rdy;
-            const float tz = (((nz + 0.5f + 0.5f * signf(dz)) * rH * 2 - 1) * mip_bound - z) * rdz;
-            const float tt = t + fmaxf(0.0f, fminf(tx, fminf(ty, tz)));
-            // step until next voxel
-            do {
-                t += clamp(t * dt_gamma, dt_min, dt_max);
-            } while (t < tt);
-        }
-    }
-}
-void march_rays(const uint32_t n_alive, const uint32_t n_step, const at::Tensor rays_alive, const at::Tensor rays_t, const at::Tensor rays_o, const at::Tensor rays_d, const float bound, const float dt_gamma, const uint32_t max_steps, const uint32_t C, const uint32_t H, const at::Tensor grid, const at::Tensor near, const at::Tensor far, at::Tensor xyzs, at::Tensor dirs, at::Tensor deltas, at::Tensor noises) {
-    static constexpr uint32_t N_THREAD = 128;
-    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-    rays_o.scalar_type(), "march_rays", ([&] {
-        kernel_march_rays<<<div_round_up(n_alive, N_THREAD), N_THREAD>>>(n_alive, n_step, rays_alive.data_ptr<int>(), rays_t.data_ptr<scalar_t>(), rays_o.data_ptr<scalar_t>(), rays_d.data_ptr<scalar_t>(), bound, dt_gamma, max_steps, C, H, grid.data_ptr<uint8_t>(), near.data_ptr<scalar_t>(), far.data_ptr<scalar_t>(), xyzs.data_ptr<scalar_t>(), dirs.data_ptr<scalar_t>(), deltas.data_ptr<scalar_t>(), noises.data_ptr<scalar_t>());
-    }));
-}
-template <typename scalar_t>
-__global__ void kernel_composite_rays(
-    const uint32_t n_alive,
-    const uint32_t n_step,
-    const float T_thresh,
-    int* rays_alive,
-    scalar_t* rays_t,
-    const scalar_t* __restrict__ sigmas,
-    const scalar_t* __restrict__ rgbs,
-    const scalar_t* __restrict__ deltas,
-    scalar_t* weights_sum, scalar_t* depth, scalar_t* image
-) {
-    const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x;
-    if (n >= n_alive) return;
-    const int index = rays_alive[n]; // ray id
-    // locate
-    sigmas += n * n_step;
-    rgbs += n * n_step * 3;
-    deltas += n * n_step * 2;
-    rays_t += index;
-    weights_sum += index;
-    depth += index;
-    image += index * 3;
-    scalar_t t = rays_t[0]; // current ray's t
-    scalar_t weight_sum = weights_sum[0];
-    scalar_t d = depth[0];
-    scalar_t r = image[0];
-    scalar_t g = image[1];
-    scalar_t b = image[2];
-    // accumulate
-    uint32_t step = 0;
-    while (step < n_step) {
-        // ray is terminated if delta == 0
-        if (deltas[0] == 0) break;
-        const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]);
-        /*
-        T_0 = 1; T_i = \prod_{j=0}^{i-1} (1 - alpha_j)
-        w_i = alpha_i * T_i
-        -->
-        T_i = 1 - \sum_{j=0}^{i-1} w_j
-        */
-        const scalar_t T = 1 - weight_sum;
-        const scalar_t weight = alpha * T;
-        weight_sum += weight;
-        t += deltas[1]; // real delta
-        d += weight * t;
-        r += weight * rgbs[0];
-        g += weight * rgbs[1];
-        b += weight * rgbs[2];
-        //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d);
-        // ray is terminated if T is too small
-        // use a larger bound to further accelerate inference
-        if (T < T_thresh) break;
-        // locate
-        sigmas++;
-        rgbs += 3;
-        deltas += 2;
-        step++;
-    }
-    //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d);
-    // rays_alive = -1 means ray is terminated early.
-    if (step < n_step) {
-        rays_alive[n] = -1;
-    } else {
-        rays_t[0] = t;
-    }
-    weights_sum[0] = weight_sum; // this is the thing I needed!
-    depth[0] = d;
-    image[0] = r;
-    image[1] = g;
-    image[2] = b;
-}
-void composite_rays(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor weights, at::Tensor depth, at::Tensor image) {
-    static constexpr uint32_t N_THREAD = 128;
-    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-    image.scalar_type(), "composite_rays", ([&] {
-        kernel_composite_rays<<<div_round_up(n_alive, N_THREAD), N_THREAD>>>(n_alive, n_step, T_thresh, rays_alive.data_ptr<int>(), rays_t.data_ptr<scalar_t>(), sigmas.data_ptr<scalar_t>(), rgbs.data_ptr<scalar_t>(), deltas.data_ptr<scalar_t>(), weights.data_ptr<scalar_t>(), depth.data_ptr<scalar_t>(), image.data_ptr<scalar_t>());
-    }));
-}

raymarching/src/raymarching.h DELETED Viewed

@@ -1,18 +0,0 @@
-#pragma once
-#include <stdint.h>
-#include <torch/torch.h>
-void near_far_from_aabb(const at::Tensor rays_o, const at::Tensor rays_d, const at::Tensor aabb, const uint32_t N, const float min_near, at::Tensor nears, at::Tensor fars);
-void sph_from_ray(const at::Tensor rays_o, const at::Tensor rays_d, const float radius, const uint32_t N, at::Tensor coords);
-void morton3D(const at::Tensor coords, const uint32_t N, at::Tensor indices);
-void morton3D_invert(const at::Tensor indices, const uint32_t N, at::Tensor coords);
-void packbits(const at::Tensor grid, const uint32_t N, const float density_thresh, at::Tensor bitfield);
-void march_rays_train(const at::Tensor rays_o, const at::Tensor rays_d, const at::Tensor grid, const float bound, const float dt_gamma, const uint32_t max_steps, const uint32_t N, const uint32_t C, const uint32_t H, const uint32_t M, const at::Tensor nears, const at::Tensor fars, at::Tensor xyzs, at::Tensor dirs, at::Tensor deltas, at::Tensor rays, at::Tensor counter, at::Tensor noises);
-void composite_rays_train_forward(const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor deltas, const at::Tensor rays, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor weights_sum, at::Tensor depth, at::Tensor image);
-void composite_rays_train_backward(const at::Tensor grad_weights_sum, const at::Tensor grad_image, const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor deltas, const at::Tensor rays, const at::Tensor weights_sum, const at::Tensor image, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor grad_sigmas, at::Tensor grad_rgbs);
-void march_rays(const uint32_t n_alive, const uint32_t n_step, const at::Tensor rays_alive, const at::Tensor rays_t, const at::Tensor rays_o, const at::Tensor rays_d, const float bound, const float dt_gamma, const uint32_t max_steps, const uint32_t C, const uint32_t H, const at::Tensor grid, const at::Tensor nears, const at::Tensor fars, at::Tensor xyzs, at::Tensor dirs, at::Tensor deltas, at::Tensor noises);
-void composite_rays(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor weights_sum, at::Tensor depth, at::Tensor image);

render_batch.py DELETED Viewed

@@ -1,20 +0,0 @@
-import subprocess
-from ldm.base_utils import save_pickle
-uids=['6f99fb8c2f1a4252b986ed5a765e1db9','8bba4678f9a349d6a29314ccf337975c','063b1b7d877a402ead76cedb06341681',
-      '199b7a080622422fac8140b61cc7544a','83784b6f7a064212ab50aaaaeb1d7fa7','5501434a052c49d6a8a8d9a1120fee10',
-      'cca62f95635f4b20aea4f35014632a55','d2e8612a21044111a7176da2bd78de05','f9e172dd733644a2b47a824e202c89d5']
-# for uid in uids:
-#     cmds = ['blender','--background','--python','blender_script.py','--',
-#             '--object_path',f'objaverse_examples/{uid}/{uid}.glb',
-#             '--output_dir','./training_examples/input','--camera_type','random']
-#     subprocess.run(cmds)
-#
-#     cmds = ['blender','--background','--python','blender_script.py','--',
-#             '--object_path',f'objaverse_examples/{uid}/{uid}.glb',
-#             '--output_dir','./training_examples/target','--camera_type','fixed']
-#     subprocess.run(cmds)
-save_pickle(uids, f'training_examples/uid_set.pkl')

renderer/agg_net.py DELETED Viewed

@@ -1,83 +0,0 @@
-import torch.nn.functional as F
-import torch.nn as nn
-import torch
-def weights_init(m):
-    if isinstance(m, nn.Linear):
-        nn.init.kaiming_normal_(m.weight.data)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias.data)
-class NeRF(nn.Module):
-    def __init__(self, vol_n=8+8, feat_ch=8+16+32+3, hid_n=64):
-        super(NeRF, self).__init__()
-        self.hid_n = hid_n
-        self.agg = Agg(feat_ch)
-        self.lr0 = nn.Sequential(nn.Linear(vol_n+16, hid_n), nn.ReLU())
-        self.sigma = nn.Sequential(nn.Linear(hid_n, 1), nn.Softplus())
-        self.color = nn.Sequential(
-            nn.Linear(16+vol_n+feat_ch+hid_n+4, hid_n), # agg_feats+vox_feat+img_feat+lr0_feats+dir
-            nn.ReLU(),
-            nn.Linear(hid_n, 1)
-        )
-        self.lr0.apply(weights_init)
-        self.sigma.apply(weights_init)
-        self.color.apply(weights_init)
-    def forward(self, vox_feat, img_feat_rgb_dir, source_img_mask):
-        # assert torch.sum(torch.sum(source_img_mask,1)<2)==0
-        b, d, n, _ = img_feat_rgb_dir.shape # b,d,n,f=8+16+32+3+4
-        agg_feat = self.agg(img_feat_rgb_dir, source_img_mask) # b,d,f=16
-        x = self.lr0(torch.cat((vox_feat, agg_feat), dim=-1)) # b,d,f=64
-        sigma = self.sigma(x) # b,d,1
-        x = torch.cat((x, vox_feat, agg_feat), dim=-1) # b,d,f=16+16+64
-        x = x.view(b, d, 1, x.shape[-1]).repeat(1, 1, n, 1)
-        x = torch.cat((x, img_feat_rgb_dir), dim=-1)
-        logits = self.color(x)
-        source_img_mask_ = source_img_mask.reshape(b, 1, n, 1).repeat(1, logits.shape[1], 1, 1) == 0
-        logits[source_img_mask_] = -1e7
-        color_weight = F.softmax(logits, dim=-2)
-        color = torch.sum((img_feat_rgb_dir[..., -7:-4] * color_weight), dim=-2)
-        return color, sigma
-class Agg(nn.Module):
-    def __init__(self, feat_ch):
-        super(Agg, self).__init__()
-        self.feat_ch = feat_ch
-        self.view_fc = nn.Sequential(nn.Linear(4, feat_ch), nn.ReLU())
-        self.view_fc.apply(weights_init)
-        self.global_fc = nn.Sequential(nn.Linear(feat_ch*3, 32), nn.ReLU())
-        self.agg_w_fc = nn.Linear(32, 1)
-        self.fc = nn.Linear(32, 16)
-        self.global_fc.apply(weights_init)
-        self.agg_w_fc.apply(weights_init)
-        self.fc.apply(weights_init)
-    def masked_mean_var(self, img_feat_rgb, source_img_mask):
-        # img_feat_rgb: b,d,n,f   source_img_mask: b,n
-        b, n = source_img_mask.shape
-        source_img_mask = source_img_mask.view(b, 1, n, 1)
-        mean = torch.sum(source_img_mask * img_feat_rgb, dim=-2)/ (torch.sum(source_img_mask, dim=-2) + 1e-5)
-        var = torch.sum((img_feat_rgb - mean.unsqueeze(-2)) ** 2 * source_img_mask, dim=-2) / (torch.sum(source_img_mask, dim=-2) + 1e-5)
-        return mean, var
-    def forward(self, img_feat_rgb_dir, source_img_mask):
-        # img_feat_rgb_dir b,d,n,f
-        b, d, n, _ = img_feat_rgb_dir.shape
-        view_feat = self.view_fc(img_feat_rgb_dir[..., -4:]) # b,d,n,f-4
-        img_feat_rgb =  img_feat_rgb_dir[..., :-4] + view_feat
-        mean_feat, var_feat = self.masked_mean_var(img_feat_rgb, source_img_mask)
-        var_feat = var_feat.view(b, -1, 1, self.feat_ch).repeat(1, 1, n, 1)
-        avg_feat = mean_feat.view(b, -1, 1, self.feat_ch).repeat(1, 1, n, 1)
-        feat = torch.cat([img_feat_rgb, var_feat, avg_feat], dim=-1) # b,d,n,f
-        global_feat = self.global_fc(feat) # b,d,n,f
-        logits = self.agg_w_fc(global_feat) # b,d,n,1
-        source_img_mask_ = source_img_mask.reshape(b, 1, n, 1).repeat(1, logits.shape[1], 1, 1) == 0
-        logits[source_img_mask_] = -1e7
-        agg_w = F.softmax(logits, dim=-2)
-        im_feat = (global_feat * agg_w).sum(dim=-2)
-        return self.fc(im_feat)

renderer/cost_reg_net.py DELETED Viewed

@@ -1,95 +0,0 @@
-import torch.nn as nn
-class ConvBnReLU3D(nn.Module):
-    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, pad=1, norm_act=nn.BatchNorm3d):
-        super(ConvBnReLU3D, self).__init__()
-        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride=stride, padding=pad, bias=False)
-        self.bn = norm_act(out_channels)
-        self.relu = nn.ReLU(inplace=True)
-    def forward(self, x):
-        return self.relu(self.bn(self.conv(x)))
-class CostRegNet(nn.Module):
-    def __init__(self, in_channels, norm_act=nn.BatchNorm3d):
-        super(CostRegNet, self).__init__()
-        self.conv0 = ConvBnReLU3D(in_channels, 8, norm_act=norm_act)
-        self.conv1 = ConvBnReLU3D(8, 16, stride=2, norm_act=norm_act)
-        self.conv2 = ConvBnReLU3D(16, 16, norm_act=norm_act)
-        self.conv3 = ConvBnReLU3D(16, 32, stride=2, norm_act=norm_act)
-        self.conv4 = ConvBnReLU3D(32, 32, norm_act=norm_act)
-        self.conv5 = ConvBnReLU3D(32, 64, stride=2, norm_act=norm_act)
-        self.conv6 = ConvBnReLU3D(64, 64, norm_act=norm_act)
-        self.conv7 = nn.Sequential(
-            nn.ConvTranspose3d(64, 32, 3, padding=1, output_padding=1, stride=2, bias=False),
-            norm_act(32)
-        )
-        self.conv9 = nn.Sequential(
-            nn.ConvTranspose3d(32, 16, 3, padding=1, output_padding=1, stride=2, bias=False),
-            norm_act(16)
-        )
-        self.conv11 = nn.Sequential(
-            nn.ConvTranspose3d(16, 8, 3, padding=1, output_padding=1,stride=2, bias=False),
-            norm_act(8)
-        )
-        self.depth_conv = nn.Sequential(nn.Conv3d(8, 1, 3, padding=1, bias=False))
-        self.feat_conv = nn.Sequential(nn.Conv3d(8, 8, 3, padding=1, bias=False))
-    def forward(self, x):
-        conv0 = self.conv0(x)
-        conv2 = self.conv2(self.conv1(conv0))
-        conv4 = self.conv4(self.conv3(conv2))
-        x = self.conv6(self.conv5(conv4))
-        x = conv4 + self.conv7(x)
-        del conv4
-        x = conv2 + self.conv9(x)
-        del conv2
-        x = conv0 + self.conv11(x)
-        del conv0
-        feat = self.feat_conv(x)
-        depth = self.depth_conv(x)
-        return feat, depth
-class MinCostRegNet(nn.Module):
-    def __init__(self, in_channels, norm_act=nn.BatchNorm3d):
-        super(MinCostRegNet, self).__init__()
-        self.conv0 = ConvBnReLU3D(in_channels, 8, norm_act=norm_act)
-        self.conv1 = ConvBnReLU3D(8, 16, stride=2, norm_act=norm_act)
-        self.conv2 = ConvBnReLU3D(16, 16, norm_act=norm_act)
-        self.conv3 = ConvBnReLU3D(16, 32, stride=2, norm_act=norm_act)
-        self.conv4 = ConvBnReLU3D(32, 32, norm_act=norm_act)
-        self.conv9 = nn.Sequential(
-            nn.ConvTranspose3d(32, 16, 3, padding=1, output_padding=1,
-                               stride=2, bias=False),
-            norm_act(16))
-        self.conv11 = nn.Sequential(
-            nn.ConvTranspose3d(16, 8, 3, padding=1, output_padding=1,
-                               stride=2, bias=False),
-            norm_act(8))
-        self.depth_conv = nn.Sequential(nn.Conv3d(8, 1, 3, padding=1, bias=False))
-        self.feat_conv = nn.Sequential(nn.Conv3d(8, 8, 3, padding=1, bias=False))
-    def forward(self, x):
-        conv0 = self.conv0(x)
-        conv2 = self.conv2(self.conv1(conv0))
-        conv4 = self.conv4(self.conv3(conv2))
-        x = conv4
-        x = conv2 + self.conv9(x)
-        del conv2
-        x = conv0 + self.conv11(x)
-        del conv0
-        feat = self.feat_conv(x)
-        depth = self.depth_conv(x)
-        return feat, depth

renderer/dummy_dataset.py DELETED Viewed

@@ -1,40 +0,0 @@
-import pytorch_lightning as pl
-from torch.utils.data import Dataset
-import webdataset as wds
-from torch.utils.data.distributed import DistributedSampler
-class DummyDataset(pl.LightningDataModule):
-    def __init__(self,seed):
-        super().__init__()
-    def setup(self, stage):
-        if stage in ['fit']:
-            self.train_dataset = DummyData(True)
-            self.val_dataset = DummyData(False)
-        else:
-            raise NotImplementedError
-    def train_dataloader(self):
-        return wds.WebLoader(self.train_dataset, batch_size=1, num_workers=0, shuffle=False)
-    def val_dataloader(self):
-        return wds.WebLoader(self.val_dataset, batch_size=1, num_workers=0, shuffle=False)
-    def test_dataloader(self):
-        return wds.WebLoader(DummyData(False))
-class DummyData(Dataset):
-    def __init__(self,is_train):
-        self.is_train=is_train
-    def __len__(self):
-        if self.is_train:
-            return 99999999
-        else:
-            return 1
-    def __getitem__(self, index):
-        return {}

renderer/feature_net.py DELETED Viewed

@@ -1,42 +0,0 @@
-import torch.nn as nn
-import torch.nn.functional as F
-class ConvBnReLU(nn.Module):
-    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, pad=1, norm_act=nn.BatchNorm2d):
-        super(ConvBnReLU, self).__init__()
-        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=pad, bias=False)
-        self.bn = norm_act(out_channels)
-        self.relu = nn.ReLU(inplace=True)
-    def forward(self, x):
-        return self.relu(self.bn(self.conv(x)))
-class FeatureNet(nn.Module):
-    def __init__(self, norm_act=nn.BatchNorm2d):
-        super(FeatureNet, self).__init__()
-        self.conv0 = nn.Sequential(ConvBnReLU(3, 8, 3, 1, 1, norm_act=norm_act), ConvBnReLU(8, 8, 3, 1, 1, norm_act=norm_act))
-        self.conv1 = nn.Sequential(ConvBnReLU(8, 16, 5, 2, 2, norm_act=norm_act), ConvBnReLU(16, 16, 3, 1, 1, norm_act=norm_act))
-        self.conv2 = nn.Sequential(ConvBnReLU(16, 32, 5, 2, 2, norm_act=norm_act), ConvBnReLU(32, 32, 3, 1, 1, norm_act=norm_act))
-        self.toplayer = nn.Conv2d(32, 32, 1)
-        self.lat1 = nn.Conv2d(16, 32, 1)
-        self.lat0 = nn.Conv2d(8, 32, 1)
-        self.smooth1 = nn.Conv2d(32, 16, 3, padding=1)
-        self.smooth0 = nn.Conv2d(32, 8, 3, padding=1)
-    def _upsample_add(self, x, y):
-        return F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True) + y
-    def forward(self, x):
-        conv0 = self.conv0(x)
-        conv1 = self.conv1(conv0)
-        conv2 = self.conv2(conv1)
-        feat2 = self.toplayer(conv2)
-        feat1 = self._upsample_add(feat2, self.lat1(conv1))
-        feat0 = self._upsample_add(feat1, self.lat0(conv0))
-        feat1 = self.smooth1(feat1)
-        feat0 = self.smooth0(feat0)
-        return feat2, feat1, feat0

renderer/neus_networks.py DELETED Viewed

@@ -1,503 +0,0 @@
-import math
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import tinycudann as tcnn
-# Positional encoding embedding. Code was taken from https://github.com/bmild/nerf.
-class Embedder:
-    def __init__(self, **kwargs):
-        self.kwargs = kwargs
-        self.create_embedding_fn()
-    def create_embedding_fn(self):
-        embed_fns = []
-        d = self.kwargs['input_dims']
-        out_dim = 0
-        if self.kwargs['include_input']:
-            embed_fns.append(lambda x: x)
-            out_dim += d
-        max_freq = self.kwargs['max_freq_log2']
-        N_freqs = self.kwargs['num_freqs']
-        if self.kwargs['log_sampling']:
-            freq_bands = 2. ** torch.linspace(0., max_freq, N_freqs)
-        else:
-            freq_bands = torch.linspace(2. ** 0., 2. ** max_freq, N_freqs)
-        for freq in freq_bands:
-            for p_fn in self.kwargs['periodic_fns']:
-                embed_fns.append(lambda x, p_fn=p_fn, freq=freq: p_fn(x * freq))
-                out_dim += d
-        self.embed_fns = embed_fns
-        self.out_dim = out_dim
-    def embed(self, inputs):
-        return torch.cat([fn(inputs) for fn in self.embed_fns], -1)
-def get_embedder(multires, input_dims=3):
-    embed_kwargs = {
-        'include_input': True,
-        'input_dims': input_dims,
-        'max_freq_log2': multires - 1,
-        'num_freqs': multires,
-        'log_sampling': True,
-        'periodic_fns': [torch.sin, torch.cos],
-    }
-    embedder_obj = Embedder(**embed_kwargs)
-    def embed(x, eo=embedder_obj): return eo.embed(x)
-    return embed, embedder_obj.out_dim
-class SDFNetwork(nn.Module):
-    def __init__(self, d_in, d_out, d_hidden, n_layers, skip_in=(4,), multires=0, bias=0.5,
-                 scale=1, geometric_init=True, weight_norm=True, inside_outside=False):
-        super(SDFNetwork, self).__init__()
-        dims = [d_in] + [d_hidden for _ in range(n_layers)] + [d_out]
-        self.embed_fn_fine = None
-        if multires > 0:
-            embed_fn, input_ch = get_embedder(multires, input_dims=d_in)
-            self.embed_fn_fine = embed_fn
-            dims[0] = input_ch
-        self.num_layers = len(dims)
-        self.skip_in = skip_in
-        self.scale = scale
-        for l in range(0, self.num_layers - 1):
-            if l + 1 in self.skip_in:
-                out_dim = dims[l + 1] - dims[0]
-            else:
-                out_dim = dims[l + 1]
-            lin = nn.Linear(dims[l], out_dim)
-            if geometric_init:
-                if l == self.num_layers - 2:
-                    if not inside_outside:
-                        torch.nn.init.normal_(lin.weight, mean=np.sqrt(np.pi) / np.sqrt(dims[l]), std=0.0001)
-                        torch.nn.init.constant_(lin.bias, -bias)
-                    else:
-                        torch.nn.init.normal_(lin.weight, mean=-np.sqrt(np.pi) / np.sqrt(dims[l]), std=0.0001)
-                        torch.nn.init.constant_(lin.bias, bias)
-                elif multires > 0 and l == 0:
-                    torch.nn.init.constant_(lin.bias, 0.0)
-                    torch.nn.init.constant_(lin.weight[:, 3:], 0.0)
-                    torch.nn.init.normal_(lin.weight[:, :3], 0.0, np.sqrt(2) / np.sqrt(out_dim))
-                elif multires > 0 and l in self.skip_in:
-                    torch.nn.init.constant_(lin.bias, 0.0)
-                    torch.nn.init.normal_(lin.weight, 0.0, np.sqrt(2) / np.sqrt(out_dim))
-                    torch.nn.init.constant_(lin.weight[:, -(dims[0] - 3):], 0.0)
-                else:
-                    torch.nn.init.constant_(lin.bias, 0.0)
-                    torch.nn.init.normal_(lin.weight, 0.0, np.sqrt(2) / np.sqrt(out_dim))
-            if weight_norm:
-                lin = nn.utils.weight_norm(lin)
-            setattr(self, "lin" + str(l), lin)
-        self.activation = nn.Softplus(beta=100)
-    def forward(self, inputs):
-        inputs = inputs * self.scale
-        if self.embed_fn_fine is not None:
-            inputs = self.embed_fn_fine(inputs)
-        x = inputs
-        for l in range(0, self.num_layers - 1):
-            lin = getattr(self, "lin" + str(l))
-            if l in self.skip_in:
-                x = torch.cat([x, inputs], -1) / np.sqrt(2)
-            x = lin(x)
-            if l < self.num_layers - 2:
-                x = self.activation(x)
-        return x
-    def sdf(self, x):
-        return self.forward(x)[..., :1]
-    def sdf_hidden_appearance(self, x):
-        return self.forward(x)
-    def gradient(self, x):
-        x.requires_grad_(True)
-        with torch.enable_grad():
-            y = self.sdf(x)
-        d_output = torch.ones_like(y, requires_grad=False, device=y.device)
-        gradients = torch.autograd.grad(
-            outputs=y,
-            inputs=x,
-            grad_outputs=d_output,
-            create_graph=True,
-            retain_graph=True,
-            only_inputs=True)[0]
-        return gradients
-    def sdf_normal(self, x):
-        x.requires_grad_(True)
-        with torch.enable_grad():
-            y = self.sdf(x)
-        d_output = torch.ones_like(y, requires_grad=False, device=y.device)
-        gradients = torch.autograd.grad(
-            outputs=y,
-            inputs=x,
-            grad_outputs=d_output,
-            create_graph=True,
-            retain_graph=True,
-            only_inputs=True)[0]
-        return y[..., :1].detach(), gradients.detach()
-class SDFNetworkWithFeature(nn.Module):
-    def __init__(self, cube, dp_in, df_in, d_out, d_hidden, n_layers, skip_in=(4,), multires=0, bias=0.5,
-                 scale=1, geometric_init=True, weight_norm=True, inside_outside=False, cube_length=0.5):
-        super().__init__()
-        self.register_buffer("cube", cube)
-        self.cube_length = cube_length
-        dims = [dp_in+df_in] + [d_hidden for _ in range(n_layers)] + [d_out]
-        self.embed_fn_fine = None
-        if multires > 0:
-            embed_fn, input_ch = get_embedder(multires, input_dims=dp_in)
-            self.embed_fn_fine = embed_fn
-            dims[0] = input_ch + df_in
-        self.num_layers = len(dims)
-        self.skip_in = skip_in
-        self.scale = scale
-        for l in range(0, self.num_layers - 1):
-            if l + 1 in self.skip_in:
-                out_dim = dims[l + 1] - dims[0]
-            else:
-                out_dim = dims[l + 1]
-            lin = nn.Linear(dims[l], out_dim)
-            if geometric_init:
-                if l == self.num_layers - 2:
-                    if not inside_outside:
-                        torch.nn.init.normal_(lin.weight, mean=np.sqrt(np.pi) / np.sqrt(dims[l]), std=0.0001)
-                        torch.nn.init.constant_(lin.bias, -bias)
-                    else:
-                        torch.nn.init.normal_(lin.weight, mean=-np.sqrt(np.pi) / np.sqrt(dims[l]), std=0.0001)
-                        torch.nn.init.constant_(lin.bias, bias)
-                elif multires > 0 and l == 0:
-                    torch.nn.init.constant_(lin.bias, 0.0)
-                    torch.nn.init.constant_(lin.weight[:, 3:], 0.0)
-                    torch.nn.init.normal_(lin.weight[:, :3], 0.0, np.sqrt(2) / np.sqrt(out_dim))
-                elif multires > 0 and l in self.skip_in:
-                    torch.nn.init.constant_(lin.bias, 0.0)
-                    torch.nn.init.normal_(lin.weight, 0.0, np.sqrt(2) / np.sqrt(out_dim))
-                    torch.nn.init.constant_(lin.weight[:, -(dims[0] - 3):], 0.0)
-                else:
-                    torch.nn.init.constant_(lin.bias, 0.0)
-                    torch.nn.init.normal_(lin.weight, 0.0, np.sqrt(2) / np.sqrt(out_dim))
-            if weight_norm:
-                lin = nn.utils.weight_norm(lin)
-            setattr(self, "lin" + str(l), lin)
-        self.activation = nn.Softplus(beta=100)
-    def forward(self, points):
-        points = points * self.scale
-        # note: point*2 because the cube is [-0.5,0.5]
-        with torch.no_grad():
-            feats = F.grid_sample(self.cube, points.view(1,-1,1,1,3)/self.cube_length, mode='bilinear', align_corners=True, padding_mode='zeros').detach()
-        feats = feats.view(self.cube.shape[1], -1).permute(1,0).view(*points.shape[:-1], -1)
-        if self.embed_fn_fine is not None:
-            points = self.embed_fn_fine(points)
-        x = torch.cat([points, feats], -1)
-        for l in range(0, self.num_layers - 1):
-            lin = getattr(self, "lin" + str(l))
-            if l in self.skip_in:
-                x = torch.cat([x, points, feats], -1) / np.sqrt(2)
-            x = lin(x)
-            if l < self.num_layers - 2:
-                x = self.activation(x)
-        # concat feats
-        x = torch.cat([x, feats], -1)
-        return x
-    def sdf(self, x):
-        return self.forward(x)[..., :1]
-    def sdf_hidden_appearance(self, x):
-        return self.forward(x)
-    def gradient(self, x):
-        x.requires_grad_(True)
-        with torch.enable_grad():
-            y = self.sdf(x)
-        d_output = torch.ones_like(y, requires_grad=False, device=y.device)
-        gradients = torch.autograd.grad(
-            outputs=y,
-            inputs=x,
-            grad_outputs=d_output,
-            create_graph=True,
-            retain_graph=True,
-            only_inputs=True)[0]
-        return gradients
-    def sdf_normal(self, x):
-        x.requires_grad_(True)
-        with torch.enable_grad():
-            y = self.sdf(x)
-        d_output = torch.ones_like(y, requires_grad=False, device=y.device)
-        gradients = torch.autograd.grad(
-            outputs=y,
-            inputs=x,
-            grad_outputs=d_output,
-            create_graph=True,
-            retain_graph=True,
-            only_inputs=True)[0]
-        return y[..., :1].detach(), gradients.detach()
-class VanillaMLP(nn.Module):
-    def __init__(self, dim_in, dim_out, n_neurons, n_hidden_layers):
-        super().__init__()
-        self.n_neurons, self.n_hidden_layers = n_neurons, n_hidden_layers
-        self.sphere_init, self.weight_norm = True, True
-        self.sphere_init_radius = 0.5
-        self.layers = [self.make_linear(dim_in, self.n_neurons, is_first=True, is_last=False), self.make_activation()]
-        for i in range(self.n_hidden_layers - 1):
-            self.layers += [self.make_linear(self.n_neurons, self.n_neurons, is_first=False, is_last=False), self.make_activation()]
-        self.layers += [self.make_linear(self.n_neurons, dim_out, is_first=False, is_last=True)]
-        self.layers = nn.Sequential(*self.layers)
-    @torch.cuda.amp.autocast(False)
-    def forward(self, x):
-        x = self.layers(x.float())
-        return x
-    def make_linear(self, dim_in, dim_out, is_first, is_last):
-        layer = nn.Linear(dim_in, dim_out, bias=True)  # network without bias will degrade quality
-        if self.sphere_init:
-            if is_last:
-                torch.nn.init.constant_(layer.bias, -self.sphere_init_radius)
-                torch.nn.init.normal_(layer.weight, mean=math.sqrt(math.pi) / math.sqrt(dim_in), std=0.0001)
-            elif is_first:
-                torch.nn.init.constant_(layer.bias, 0.0)
-                torch.nn.init.constant_(layer.weight[:, 3:], 0.0)
-                torch.nn.init.normal_(layer.weight[:, :3], 0.0, math.sqrt(2) / math.sqrt(dim_out))
-            else:
-                torch.nn.init.constant_(layer.bias, 0.0)
-                torch.nn.init.normal_(layer.weight, 0.0, math.sqrt(2) / math.sqrt(dim_out))
-        else:
-            torch.nn.init.constant_(layer.bias, 0.0)
-            torch.nn.init.kaiming_uniform_(layer.weight, nonlinearity='relu')
-        if self.weight_norm:
-            layer = nn.utils.weight_norm(layer)
-        return layer
-    def make_activation(self):
-        if self.sphere_init:
-            return nn.Softplus(beta=100)
-        else:
-            return nn.ReLU(inplace=True)
-class SDFHashGridNetwork(nn.Module):
-    def __init__(self, bound=0.5, feats_dim=13):
-        super().__init__()
-        self.bound = bound
-        # max_resolution = 32
-        # base_resolution = 16
-        # n_levels = 4
-        # log2_hashmap_size = 16
-        # n_features_per_level = 8
-        max_resolution = 2048
-        base_resolution = 16
-        n_levels = 16
-        log2_hashmap_size = 19
-        n_features_per_level = 2
-        # max_res = base_res * t^(k-1)
-        per_level_scale = (max_resolution / base_resolution)** (1 / (n_levels - 1))
-        self.encoder = tcnn.Encoding(
-            n_input_dims=3,
-            encoding_config={
-                "otype": "HashGrid",
-                "n_levels": n_levels,
-                "n_features_per_level": n_features_per_level,
-                "log2_hashmap_size": log2_hashmap_size,
-                "base_resolution": base_resolution,
-                "per_level_scale": per_level_scale,
-            },
-        )
-        self.sdf_mlp = VanillaMLP(n_levels*n_features_per_level+3,feats_dim,64,1)
-    def forward(self, x):
-        shape = x.shape[:-1]
-        x = x.reshape(-1, 3)
-        x_ = (x + self.bound) / (2 * self.bound)
-        feats = self.encoder(x_)
-        feats = torch.cat([x, feats], 1)
-        feats = self.sdf_mlp(feats)
-        feats = feats.reshape(*shape,-1)
-        return feats
-    def sdf(self, x):
-        return self(x)[...,:1]
-    def gradient(self, x):
-        x.requires_grad_(True)
-        with torch.enable_grad():
-            y = self.sdf(x)
-        d_output = torch.ones_like(y, requires_grad=False, device=y.device)
-        gradients = torch.autograd.grad(
-            outputs=y,
-            inputs=x,
-            grad_outputs=d_output,
-            create_graph=True,
-            retain_graph=True,
-            only_inputs=True)[0]
-        return gradients
-    def sdf_normal(self, x):
-        x.requires_grad_(True)
-        with torch.enable_grad():
-            y = self.sdf(x)
-        d_output = torch.ones_like(y, requires_grad=False, device=y.device)
-        gradients = torch.autograd.grad(
-            outputs=y,
-            inputs=x,
-            grad_outputs=d_output,
-            create_graph=True,
-            retain_graph=True,
-            only_inputs=True)[0]
-        return y[..., :1].detach(), gradients.detach()
-class RenderingFFNetwork(nn.Module):
-    def __init__(self, in_feats_dim=12):
-        super().__init__()
-        self.dir_encoder = tcnn.Encoding(
-            n_input_dims=3,
-            encoding_config={
-                "otype": "SphericalHarmonics",
-                "degree": 4,
-            },
-        )
-        self.color_mlp = tcnn.Network(
-            n_input_dims = in_feats_dim + 3 + self.dir_encoder.n_output_dims,
-            n_output_dims = 3,
-            network_config={
-              "otype": "FullyFusedMLP",
-              "activation": "ReLU",
-              "output_activation": "none",
-              "n_neurons": 64,
-              "n_hidden_layers": 2,
-            },
-        )
-    def forward(self, points, normals, view_dirs, feature_vectors):
-        normals = F.normalize(normals, dim=-1)
-        view_dirs = F.normalize(view_dirs, dim=-1)
-        reflective = torch.sum(view_dirs * normals, -1, keepdim=True) * normals * 2 - view_dirs
-        x = torch.cat([feature_vectors, normals, self.dir_encoder(reflective)], -1)
-        colors = self.color_mlp(x).float()
-        colors = F.sigmoid(colors)
-        return colors
-# This implementation is borrowed from IDR: https://github.com/lioryariv/idr
-class RenderingNetwork(nn.Module):
-    def __init__(self, d_feature, d_in, d_out, d_hidden,
-                 n_layers, weight_norm=True, multires_view=0, squeeze_out=True, use_view_dir=True):
-        super().__init__()
-        self.squeeze_out = squeeze_out
-        self.rgb_act=F.sigmoid
-        self.use_view_dir=use_view_dir
-        dims = [d_in + d_feature] + [d_hidden for _ in range(n_layers)] + [d_out]
-        self.embedview_fn = None
-        if multires_view > 0:
-            embedview_fn, input_ch = get_embedder(multires_view)
-            self.embedview_fn = embedview_fn
-            dims[0] += (input_ch - 3)
-        self.num_layers = len(dims)
-        for l in range(0, self.num_layers - 1):
-            out_dim = dims[l + 1]
-            lin = nn.Linear(dims[l], out_dim)
-            if weight_norm:
-                lin = nn.utils.weight_norm(lin)
-            setattr(self, "lin" + str(l), lin)
-        self.relu = nn.ReLU()
-    def forward(self, points, normals, view_dirs, feature_vectors):
-        if self.use_view_dir:
-            view_dirs = F.normalize(view_dirs, dim=-1)
-            normals = F.normalize(normals, dim=-1)
-            reflective = torch.sum(view_dirs*normals, -1, keepdim=True) * normals * 2 - view_dirs
-            if self.embedview_fn is not None: reflective = self.embedview_fn(reflective)
-            rendering_input = torch.cat([points, reflective, normals, feature_vectors], dim=-1)
-        else:
-            rendering_input = torch.cat([points, normals, feature_vectors], dim=-1)
-        x = rendering_input
-        for l in range(0, self.num_layers - 1):
-            lin = getattr(self, "lin" + str(l))
-            x = lin(x)
-            if l < self.num_layers - 2:
-                x = self.relu(x)
-        if self.squeeze_out:
-            x = self.rgb_act(x)
-        return x
-class SingleVarianceNetwork(nn.Module):
-    def __init__(self, init_val, activation='exp'):
-        super(SingleVarianceNetwork, self).__init__()
-        self.act = activation
-        self.register_parameter('variance', nn.Parameter(torch.tensor(init_val)))
-    def forward(self, x):
-        device = x.device
-        if self.act=='exp':
-            return torch.ones([*x.shape[:-1], 1], dtype=torch.float32, device=device) * torch.exp(self.variance * 10.0)
-        else:
-            raise NotImplementedError
-    def warp(self, x, inv_s):
-        device = x.device
-        return torch.ones([*x.shape[:-1], 1], dtype=torch.float32, device=device) * inv_s

renderer/ngp_renderer.py DELETED Viewed

@@ -1,721 +0,0 @@
-import math
-import trimesh
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from packaging import version as pver
-import tinycudann as tcnn
-from torch.autograd import Function
-from torch.cuda.amp import custom_bwd, custom_fwd
-import raymarching
-def custom_meshgrid(*args):
-    # ref: https://pytorch.org/docs/stable/generated/torch.meshgrid.html?highlight=meshgrid#torch.meshgrid
-    if pver.parse(torch.__version__) < pver.parse('1.10'):
-        return torch.meshgrid(*args)
-    else:
-        return torch.meshgrid(*args, indexing='ij')
-def sample_pdf(bins, weights, n_samples, det=False):
-    # This implementation is from NeRF
-    # bins: [B, T], old_z_vals
-    # weights: [B, T - 1], bin weights.
-    # return: [B, n_samples], new_z_vals
-    # Get pdf
-    weights = weights + 1e-5  # prevent nans
-    pdf = weights / torch.sum(weights, -1, keepdim=True)
-    cdf = torch.cumsum(pdf, -1)
-    cdf = torch.cat([torch.zeros_like(cdf[..., :1]), cdf], -1)
-    # Take uniform samples
-    if det:
-        u = torch.linspace(0. + 0.5 / n_samples, 1. - 0.5 / n_samples, steps=n_samples).to(weights.device)
-        u = u.expand(list(cdf.shape[:-1]) + [n_samples])
-    else:
-        u = torch.rand(list(cdf.shape[:-1]) + [n_samples]).to(weights.device)
-    # Invert CDF
-    u = u.contiguous()
-    inds = torch.searchsorted(cdf, u, right=True)
-    below = torch.max(torch.zeros_like(inds - 1), inds - 1)
-    above = torch.min((cdf.shape[-1] - 1) * torch.ones_like(inds), inds)
-    inds_g = torch.stack([below, above], -1)  # (B, n_samples, 2)
-    matched_shape = [inds_g.shape[0], inds_g.shape[1], cdf.shape[-1]]
-    cdf_g = torch.gather(cdf.unsqueeze(1).expand(matched_shape), 2, inds_g)
-    bins_g = torch.gather(bins.unsqueeze(1).expand(matched_shape), 2, inds_g)
-    denom = (cdf_g[..., 1] - cdf_g[..., 0])
-    denom = torch.where(denom < 1e-5, torch.ones_like(denom), denom)
-    t = (u - cdf_g[..., 0]) / denom
-    samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0])
-    return samples
-def plot_pointcloud(pc, color=None):
-    # pc: [N, 3]
-    # color: [N, 3/4]
-    print('[visualize points]', pc.shape, pc.dtype, pc.min(0), pc.max(0))
-    pc = trimesh.PointCloud(pc, color)
-    # axis
-    axes = trimesh.creation.axis(axis_length=4)
-    # sphere
-    sphere = trimesh.creation.icosphere(radius=1)
-    trimesh.Scene([pc, axes, sphere]).show()
-class NGPRenderer(nn.Module):
-    def __init__(self,
-                 bound=1,
-                 cuda_ray=True,
-                 density_scale=1, # scale up deltas (or sigmas), to make the density grid more sharp. larger value than 1 usually improves performance.
-                 min_near=0.2,
-                 density_thresh=0.01,
-                 bg_radius=-1,
-                 ):
-        super().__init__()
-        self.bound = bound
-        self.cascade = 1
-        self.grid_size = 128
-        self.density_scale = density_scale
-        self.min_near = min_near
-        self.density_thresh = density_thresh
-        self.bg_radius = bg_radius # radius of the background sphere.
-        # prepare aabb with a 6D tensor (xmin, ymin, zmin, xmax, ymax, zmax)
-        # NOTE: aabb (can be rectangular) is only used to generate points, we still rely on bound (always cubic) to calculate density grid and hashing.
-        aabb_train = torch.FloatTensor([-bound, -bound, -bound, bound, bound, bound])
-        aabb_infer = aabb_train.clone()
-        self.register_buffer('aabb_train', aabb_train)
-        self.register_buffer('aabb_infer', aabb_infer)
-        # extra state for cuda raymarching
-        self.cuda_ray = cuda_ray
-        if cuda_ray:
-            # density grid
-            density_grid = torch.zeros([self.cascade, self.grid_size ** 3]) # [CAS, H * H * H]
-            density_bitfield = torch.zeros(self.cascade * self.grid_size ** 3 // 8, dtype=torch.uint8) # [CAS * H * H * H // 8]
-            self.register_buffer('density_grid', density_grid)
-            self.register_buffer('density_bitfield', density_bitfield)
-            self.mean_density = 0
-            self.iter_density = 0
-            # step counter
-            step_counter = torch.zeros(16, 2, dtype=torch.int32) # 16 is hardcoded for averaging...
-            self.register_buffer('step_counter', step_counter)
-            self.mean_count = 0
-            self.local_step = 0
-    def forward(self, x, d):
-        raise NotImplementedError()
-    # separated density and color query (can accelerate non-cuda-ray mode.)
-    def density(self, x):
-        raise NotImplementedError()
-    def color(self, x, d, mask=None, **kwargs):
-        raise NotImplementedError()
-    def reset_extra_state(self):
-        if not self.cuda_ray:
-            return
-        # density grid
-        self.density_grid.zero_()
-        self.mean_density = 0
-        self.iter_density = 0
-        # step counter
-        self.step_counter.zero_()
-        self.mean_count = 0
-        self.local_step = 0
-    def run(self, rays_o, rays_d, num_steps=128, upsample_steps=128, bg_color=None, perturb=False, **kwargs):
-        # rays_o, rays_d: [B, N, 3], assumes B == 1
-        # bg_color: [3] in range [0, 1]
-        # return: image: [B, N, 3], depth: [B, N]
-        prefix = rays_o.shape[:-1]
-        rays_o = rays_o.contiguous().view(-1, 3)
-        rays_d = rays_d.contiguous().view(-1, 3)
-        N = rays_o.shape[0] # N = B * N, in fact
-        device = rays_o.device
-        # choose aabb
-        aabb = self.aabb_train if self.training else self.aabb_infer
-        # sample steps
-        nears, fars = raymarching.near_far_from_aabb(rays_o, rays_d, aabb, self.min_near)
-        nears.unsqueeze_(-1)
-        fars.unsqueeze_(-1)
-        #print(f'nears = {nears.min().item()} ~ {nears.max().item()}, fars = {fars.min().item()} ~ {fars.max().item()}')
-        z_vals = torch.linspace(0.0, 1.0, num_steps, device=device).unsqueeze(0) # [1, T]
-        z_vals = z_vals.expand((N, num_steps)) # [N, T]
-        z_vals = nears + (fars - nears) * z_vals # [N, T], in [nears, fars]
-        # perturb z_vals
-        sample_dist = (fars - nears) / num_steps
-        if perturb:
-            z_vals = z_vals + (torch.rand(z_vals.shape, device=device) - 0.5) * sample_dist
-            #z_vals = z_vals.clamp(nears, fars) # avoid out of bounds xyzs.
-        # generate xyzs
-        xyzs = rays_o.unsqueeze(-2) + rays_d.unsqueeze(-2) * z_vals.unsqueeze(-1) # [N, 1, 3] * [N, T, 1] -> [N, T, 3]
-        xyzs = torch.min(torch.max(xyzs, aabb[:3]), aabb[3:]) # a manual clip.
-        #plot_pointcloud(xyzs.reshape(-1, 3).detach().cpu().numpy())
-        # query SDF and RGB
-        density_outputs = self.density(xyzs.reshape(-1, 3))
-        #sigmas = density_outputs['sigma'].view(N, num_steps) # [N, T]
-        for k, v in density_outputs.items():
-            density_outputs[k] = v.view(N, num_steps, -1)
-        # upsample z_vals (nerf-like)
-        if upsample_steps > 0:
-            with torch.no_grad():
-                deltas = z_vals[..., 1:] - z_vals[..., :-1] # [N, T-1]
-                deltas = torch.cat([deltas, sample_dist * torch.ones_like(deltas[..., :1])], dim=-1)
-                alphas = 1 - torch.exp(-deltas * self.density_scale * density_outputs['sigma'].squeeze(-1)) # [N, T]
-                alphas_shifted = torch.cat([torch.ones_like(alphas[..., :1]), 1 - alphas + 1e-15], dim=-1) # [N, T+1]
-                weights = alphas * torch.cumprod(alphas_shifted, dim=-1)[..., :-1] # [N, T]
-                # sample new z_vals
-                z_vals_mid = (z_vals[..., :-1] + 0.5 * deltas[..., :-1]) # [N, T-1]
-                new_z_vals = sample_pdf(z_vals_mid, weights[:, 1:-1], upsample_steps, det=not self.training).detach() # [N, t]
-                new_xyzs = rays_o.unsqueeze(-2) + rays_d.unsqueeze(-2) * new_z_vals.unsqueeze(-1) # [N, 1, 3] * [N, t, 1] -> [N, t, 3]
-                new_xyzs = torch.min(torch.max(new_xyzs, aabb[:3]), aabb[3:]) # a manual clip.
-            # only forward new points to save computation
-            new_density_outputs = self.density(new_xyzs.reshape(-1, 3))
-            #new_sigmas = new_density_outputs['sigma'].view(N, upsample_steps) # [N, t]
-            for k, v in new_density_outputs.items():
-                new_density_outputs[k] = v.view(N, upsample_steps, -1)
-            # re-order
-            z_vals = torch.cat([z_vals, new_z_vals], dim=1) # [N, T+t]
-            z_vals, z_index = torch.sort(z_vals, dim=1)
-            xyzs = torch.cat([xyzs, new_xyzs], dim=1) # [N, T+t, 3]
-            xyzs = torch.gather(xyzs, dim=1, index=z_index.unsqueeze(-1).expand_as(xyzs))
-            for k in density_outputs:
-                tmp_output = torch.cat([density_outputs[k], new_density_outputs[k]], dim=1)
-                density_outputs[k] = torch.gather(tmp_output, dim=1, index=z_index.unsqueeze(-1).expand_as(tmp_output))
-        deltas = z_vals[..., 1:] - z_vals[..., :-1] # [N, T+t-1]
-        deltas = torch.cat([deltas, sample_dist * torch.ones_like(deltas[..., :1])], dim=-1)
-        alphas = 1 - torch.exp(-deltas * self.density_scale * density_outputs['sigma'].squeeze(-1)) # [N, T+t]
-        alphas_shifted = torch.cat([torch.ones_like(alphas[..., :1]), 1 - alphas + 1e-15], dim=-1) # [N, T+t+1]
-        weights = alphas * torch.cumprod(alphas_shifted, dim=-1)[..., :-1] # [N, T+t]
-        dirs = rays_d.view(-1, 1, 3).expand_as(xyzs)
-        for k, v in density_outputs.items():
-            density_outputs[k] = v.view(-1, v.shape[-1])
-        mask = weights > 1e-4 # hard coded
-        rgbs = self.color(xyzs.reshape(-1, 3), dirs.reshape(-1, 3), mask=mask.reshape(-1), **density_outputs)
-        rgbs = rgbs.view(N, -1, 3) # [N, T+t, 3]
-        #print(xyzs.shape, 'valid_rgb:', mask.sum().item())
-        # calculate weight_sum (mask)
-        weights_sum = weights.sum(dim=-1) # [N]
-        # calculate depth
-        ori_z_vals = ((z_vals - nears) / (fars - nears)).clamp(0, 1)
-        depth = torch.sum(weights * ori_z_vals, dim=-1)
-        # calculate color
-        image = torch.sum(weights.unsqueeze(-1) * rgbs, dim=-2) # [N, 3], in [0, 1]
-        # mix background color
-        if self.bg_radius > 0:
-            # use the bg model to calculate bg_color
-            sph = raymarching.sph_from_ray(rays_o, rays_d, self.bg_radius) # [N, 2] in [-1, 1]
-            bg_color = self.background(sph, rays_d.reshape(-1, 3)) # [N, 3]
-        elif bg_color is None:
-            bg_color = 1
-        image = image + (1 - weights_sum).unsqueeze(-1) * bg_color
-        image = image.view(*prefix, 3)
-        depth = depth.view(*prefix)
-        # tmp: reg loss in mip-nerf 360
-        # z_vals_shifted = torch.cat([z_vals[..., 1:], sample_dist * torch.ones_like(z_vals[..., :1])], dim=-1)
-        # mid_zs = (z_vals + z_vals_shifted) / 2 # [N, T]
-        # loss_dist = (torch.abs(mid_zs.unsqueeze(1) - mid_zs.unsqueeze(2)) * (weights.unsqueeze(1) * weights.unsqueeze(2))).sum() + 1/3 * ((z_vals_shifted - z_vals_shifted) * (weights ** 2)).sum()
-        return {
-            'depth': depth,
-            'image': image,
-            'weights_sum': weights_sum,
-        }
-    def run_cuda(self, rays_o, rays_d, dt_gamma=0, bg_color=None, perturb=False, force_all_rays=False, max_steps=1024, T_thresh=1e-4, **kwargs):
-        # rays_o, rays_d: [B, N, 3], assumes B == 1
-        # return: image: [B, N, 3], depth: [B, N]
-        prefix = rays_o.shape[:-1]
-        rays_o = rays_o.contiguous().view(-1, 3)
-        rays_d = rays_d.contiguous().view(-1, 3)
-        N = rays_o.shape[0] # N = B * N, in fact
-        device = rays_o.device
-        # pre-calculate near far
-        nears, fars = raymarching.near_far_from_aabb(rays_o, rays_d, self.aabb_train if self.training else self.aabb_infer, self.min_near)
-        # mix background color
-        if self.bg_radius > 0:
-            # use the bg model to calculate bg_color
-            sph = raymarching.sph_from_ray(rays_o, rays_d, self.bg_radius) # [N, 2] in [-1, 1]
-            bg_color = self.background(sph, rays_d) # [N, 3]
-        elif bg_color is None:
-            bg_color = 1
-        results = {}
-        if self.training:
-            # setup counter
-            counter = self.step_counter[self.local_step % 16]
-            counter.zero_() # set to 0
-            self.local_step += 1
-            xyzs, dirs, deltas, rays = raymarching.march_rays_train(rays_o, rays_d, self.bound, self.density_bitfield, self.cascade, self.grid_size, nears, fars, counter, self.mean_count, perturb, 128, force_all_rays, dt_gamma, max_steps)
-            #plot_pointcloud(xyzs.reshape(-1, 3).detach().cpu().numpy())
-            sigmas, rgbs = self(xyzs, dirs)
-            sigmas = self.density_scale * sigmas
-            weights_sum, depth, image = raymarching.composite_rays_train(sigmas, rgbs, deltas, rays, T_thresh)
-            image = image + (1 - weights_sum).unsqueeze(-1) * bg_color
-            depth = torch.clamp(depth - nears, min=0) / (fars - nears)
-            image = image.view(*prefix, 3)
-            depth = depth.view(*prefix)
-        else:
-            # allocate outputs
-            # if use autocast, must init as half so it won't be autocasted and lose reference.
-            #dtype = torch.half if torch.is_autocast_enabled() else torch.float32
-            # output should always be float32! only network inference uses half.
-            dtype = torch.float32
-            weights_sum = torch.zeros(N, dtype=dtype, device=device)
-            depth = torch.zeros(N, dtype=dtype, device=device)
-            image = torch.zeros(N, 3, dtype=dtype, device=device)
-            n_alive = N
-            rays_alive = torch.arange(n_alive, dtype=torch.int32, device=device) # [N]
-            rays_t = nears.clone() # [N]
-            step = 0
-            while step < max_steps:
-                # count alive rays
-                n_alive = rays_alive.shape[0]
-                # exit loop
-                if n_alive <= 0:
-                    break
-                # decide compact_steps
-                n_step = max(min(N // n_alive, 8), 1)
-                xyzs, dirs, deltas = raymarching.march_rays(n_alive, n_step, rays_alive, rays_t, rays_o, rays_d, self.bound, self.density_bitfield, self.cascade, self.grid_size, nears, fars, 128, perturb if step == 0 else False, dt_gamma, max_steps)
-                sigmas, rgbs = self(xyzs, dirs)
-                # density_outputs = self.density(xyzs) # [M,], use a dict since it may include extra things, like geo_feat for rgb.
-                # sigmas = density_outputs['sigma']
-                # rgbs = self.color(xyzs, dirs, **density_outputs)
-                sigmas = self.density_scale * sigmas
-                raymarching.composite_rays(n_alive, n_step, rays_alive, rays_t, sigmas, rgbs, deltas, weights_sum, depth, image, T_thresh)
-                rays_alive = rays_alive[rays_alive >= 0]
-                #print(f'step = {step}, n_step = {n_step}, n_alive = {n_alive}, xyzs: {xyzs.shape}')
-                step += n_step
-            image = image + (1 - weights_sum).unsqueeze(-1) * bg_color
-            depth = torch.clamp(depth - nears, min=0) / (fars - nears)
-            image = image.view(*prefix, 3)
-            depth = depth.view(*prefix)
-        results['weights_sum'] = weights_sum
-        results['depth'] = depth
-        results['image'] = image
-        return results
-    @torch.no_grad()
-    def mark_untrained_grid(self, poses, intrinsic, S=64):
-        # poses: [B, 4, 4]
-        # intrinsic: [3, 3]
-        if not self.cuda_ray:
-            return
-        if isinstance(poses, np.ndarray):
-            poses = torch.from_numpy(poses)
-        B = poses.shape[0]
-        fx, fy, cx, cy = intrinsic
-        X = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S)
-        Y = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S)
-        Z = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S)
-        count = torch.zeros_like(self.density_grid)
-        poses = poses.to(count.device)
-        # 5-level loop, forgive me...
-        for xs in X:
-            for ys in Y:
-                for zs in Z:
-                    # construct points
-                    xx, yy, zz = custom_meshgrid(xs, ys, zs)
-                    coords = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) # [N, 3], in [0, 128)
-                    indices = raymarching.morton3D(coords).long() # [N]
-                    world_xyzs = (2 * coords.float() / (self.grid_size - 1) - 1).unsqueeze(0) # [1, N, 3] in [-1, 1]
-                    # cascading
-                    for cas in range(self.cascade):
-                        bound = min(2 ** cas, self.bound)
-                        half_grid_size = bound / self.grid_size
-                        # scale to current cascade's resolution
-                        cas_world_xyzs = world_xyzs * (bound - half_grid_size)
-                        # split batch to avoid OOM
-                        head = 0
-                        while head < B:
-                            tail = min(head + S, B)
-                            # world2cam transform (poses is c2w, so we need to transpose it. Another transpose is needed for batched matmul, so the final form is without transpose.)
-                            cam_xyzs = cas_world_xyzs - poses[head:tail, :3, 3].unsqueeze(1)
-                            cam_xyzs = cam_xyzs @ poses[head:tail, :3, :3] # [S, N, 3]
-                            # query if point is covered by any camera
-                            mask_z = cam_xyzs[:, :, 2] > 0 # [S, N]
-                            mask_x = torch.abs(cam_xyzs[:, :, 0]) < cx / fx * cam_xyzs[:, :, 2] + half_grid_size * 2
-                            mask_y = torch.abs(cam_xyzs[:, :, 1]) < cy / fy * cam_xyzs[:, :, 2] + half_grid_size * 2
-                            mask = (mask_z & mask_x & mask_y).sum(0).reshape(-1) # [N]
-                            # update count
-                            count[cas, indices] += mask
-                            head += S
-        # mark untrained grid as -1
-        self.density_grid[count == 0] = -1
-        print(f'[mark untrained grid] {(count == 0).sum()} from {self.grid_size ** 3 * self.cascade}')
-    @torch.no_grad()
-    def update_extra_state(self, decay=0.95, S=128):
-        # call before each epoch to update extra states.
-        if not self.cuda_ray:
-            return
-        ### update density grid
-        tmp_grid = - torch.ones_like(self.density_grid)
-        # full update.
-        if self.iter_density < 16:
-        #if True:
-            X = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S)
-            Y = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S)
-            Z = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S)
-            for xs in X:
-                for ys in Y:
-                    for zs in Z:
-                        # construct points
-                        xx, yy, zz = custom_meshgrid(xs, ys, zs)
-                        coords = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) # [N, 3], in [0, 128)
-                        indices = raymarching.morton3D(coords).long() # [N]
-                        xyzs = 2 * coords.float() / (self.grid_size - 1) - 1 # [N, 3] in [-1, 1]
-                        # cascading
-                        for cas in range(self.cascade):
-                            bound = min(2 ** cas, self.bound)
-                            half_grid_size = bound / self.grid_size
-                            # scale to current cascade's resolution
-                            cas_xyzs = xyzs * (bound - half_grid_size)
-                            # add noise in [-hgs, hgs]
-                            cas_xyzs += (torch.rand_like(cas_xyzs) * 2 - 1) * half_grid_size
-                            # query density
-                            sigmas = self.density(cas_xyzs)['sigma'].reshape(-1).detach()
-                            sigmas *= self.density_scale
-                            # assign
-                            tmp_grid[cas, indices] = sigmas
-        # partial update (half the computation)
-        # TODO: why no need of maxpool ?
-        else:
-            N = self.grid_size ** 3 // 4 # H * H * H / 4
-            for cas in range(self.cascade):
-                # random sample some positions
-                coords = torch.randint(0, self.grid_size, (N, 3), device=self.density_bitfield.device) # [N, 3], in [0, 128)
-                indices = raymarching.morton3D(coords).long() # [N]
-                # random sample occupied positions
-                occ_indices = torch.nonzero(self.density_grid[cas] > 0).squeeze(-1) # [Nz]
-                rand_mask = torch.randint(0, occ_indices.shape[0], [N], dtype=torch.long, device=self.density_bitfield.device)
-                occ_indices = occ_indices[rand_mask] # [Nz] --> [N], allow for duplication
-                occ_coords = raymarching.morton3D_invert(occ_indices) # [N, 3]
-                # concat
-                indices = torch.cat([indices, occ_indices], dim=0)
-                coords = torch.cat([coords, occ_coords], dim=0)
-                # same below
-                xyzs = 2 * coords.float() / (self.grid_size - 1) - 1 # [N, 3] in [-1, 1]
-                bound = min(2 ** cas, self.bound)
-                half_grid_size = bound / self.grid_size
-                # scale to current cascade's resolution
-                cas_xyzs = xyzs * (bound - half_grid_size)
-                # add noise in [-hgs, hgs]
-                cas_xyzs += (torch.rand_like(cas_xyzs) * 2 - 1) * half_grid_size
-                # query density
-                sigmas = self.density(cas_xyzs)['sigma'].reshape(-1).detach()
-                sigmas *= self.density_scale
-                # assign
-                tmp_grid[cas, indices] = sigmas
-        ## max-pool on tmp_grid for less aggressive culling [No significant improvement...]
-        # invalid_mask = tmp_grid < 0
-        # tmp_grid = F.max_pool3d(tmp_grid.view(self.cascade, 1, self.grid_size, self.grid_size, self.grid_size), kernel_size=3, stride=1, padding=1).view(self.cascade, -1)
-        # tmp_grid[invalid_mask] = -1
-        # ema update
-        valid_mask = (self.density_grid >= 0) & (tmp_grid >= 0)
-        self.density_grid[valid_mask] = torch.maximum(self.density_grid[valid_mask] * decay, tmp_grid[valid_mask])
-        self.mean_density = torch.mean(self.density_grid.clamp(min=0)).item() # -1 regions are viewed as 0 density.
-        #self.mean_density = torch.mean(self.density_grid[self.density_grid > 0]).item() # do not count -1 regions
-        self.iter_density += 1
-        # convert to bitfield
-        density_thresh = min(self.mean_density, self.density_thresh)
-        self.density_bitfield = raymarching.packbits(self.density_grid, density_thresh, self.density_bitfield)
-        ### update step counter
-        total_step = min(16, self.local_step)
-        if total_step > 0:
-            self.mean_count = int(self.step_counter[:total_step, 0].sum().item() / total_step)
-        self.local_step = 0
-        #print(f'[density grid] min={self.density_grid.min().item():.4f}, max={self.density_grid.max().item():.4f}, mean={self.mean_density:.4f}, occ_rate={(self.density_grid > 0.01).sum() / (128**3 * self.cascade):.3f} | [step counter] mean={self.mean_count}')
-    def render(self, rays_o, rays_d, staged=False, max_ray_batch=4096, **kwargs):
-        # rays_o, rays_d: [B, N, 3], assumes B == 1
-        # return: pred_rgb: [B, N, 3]
-        if self.cuda_ray:
-            _run = self.run_cuda
-        else:
-            _run = self.run
-        results = _run(rays_o, rays_d, **kwargs)
-        return results
-class _trunc_exp(Function):
-    @staticmethod
-    @custom_fwd(cast_inputs=torch.float32) # cast to float32
-    def forward(ctx, x):
-        ctx.save_for_backward(x)
-        return torch.exp(x)
-    @staticmethod
-    @custom_bwd
-    def backward(ctx, g):
-        x = ctx.saved_tensors[0]
-        return g * torch.exp(x.clamp(-15, 15))
-trunc_exp = _trunc_exp.apply
-class NGPNetwork(NGPRenderer):
-    def __init__(self,
-                 num_layers=2,
-                 hidden_dim=64,
-                 geo_feat_dim=15,
-                 num_layers_color=3,
-                 hidden_dim_color=64,
-                 bound=0.5,
-                 max_resolution=128,
-                 base_resolution=16,
-                 n_levels=16,
-                 **kwargs
-                 ):
-        super().__init__(bound, **kwargs)
-        # sigma network
-        self.num_layers = num_layers
-        self.hidden_dim = hidden_dim
-        self.geo_feat_dim = geo_feat_dim
-        self.bound = bound
-        log2_hashmap_size = 19
-        n_features_per_level = 2
-        per_level_scale = np.exp2(np.log2(max_resolution / base_resolution) / (n_levels - 1))
-        self.encoder = tcnn.Encoding(
-            n_input_dims=3,
-            encoding_config={
-                "otype": "HashGrid",
-                "n_levels": n_levels,
-                "n_features_per_level": n_features_per_level,
-                "log2_hashmap_size": log2_hashmap_size,
-                "base_resolution": base_resolution,
-                "per_level_scale": per_level_scale,
-            },
-        )
-        self.sigma_net = tcnn.Network(
-            n_input_dims = n_levels * 2,
-            n_output_dims=1 + self.geo_feat_dim,
-            network_config={
-                "otype": "FullyFusedMLP",
-                "activation": "ReLU",
-                "output_activation": "None",
-                "n_neurons": hidden_dim,
-                "n_hidden_layers": num_layers - 1,
-            },
-        )
-        # color network
-        self.num_layers_color = num_layers_color
-        self.hidden_dim_color = hidden_dim_color
-        self.encoder_dir = tcnn.Encoding(
-            n_input_dims=3,
-            encoding_config={
-                "otype": "SphericalHarmonics",
-                "degree": 4,
-            },
-        )
-        self.in_dim_color = self.encoder_dir.n_output_dims + self.geo_feat_dim
-        self.color_net = tcnn.Network(
-            n_input_dims = self.in_dim_color,
-            n_output_dims=3,
-            network_config={
-                "otype": "FullyFusedMLP",
-                "activation": "ReLU",
-                "output_activation": "None",
-                "n_neurons": hidden_dim_color,
-                "n_hidden_layers": num_layers_color - 1,
-            },
-        )
-        self.density_scale, self.density_std = 10.0, 0.25
-    def forward(self, x, d):
-        # x: [N, 3], in [-bound, bound]
-        # d: [N, 3], nomalized in [-1, 1]
-        # sigma
-        x_raw = x
-        x = (x + self.bound) / (2 * self.bound)  # to [0, 1]
-        x = self.encoder(x)
-        h = self.sigma_net(x)
-        # sigma = F.relu(h[..., 0])
-        density = h[..., 0]
-        # add density bias
-        dist = torch.norm(x_raw, dim=-1)
-        density_bias = (1 - dist / self.density_std) * self.density_scale
-        density = density_bias + density
-        sigma = F.softplus(density)
-        geo_feat = h[..., 1:]
-        # color
-        d = (d + 1) / 2  # tcnn SH encoding requires inputs to be in [0, 1]
-        d = self.encoder_dir(d)
-        # p = torch.zeros_like(geo_feat[..., :1]) # manual input padding
-        h = torch.cat([d, geo_feat], dim=-1)
-        h = self.color_net(h)
-        # sigmoid activation for rgb
-        color = torch.sigmoid(h)
-        return sigma, color
-    def density(self, x):
-        # x: [N, 3], in [-bound, bound]
-        x_raw = x
-        x = (x + self.bound) / (2 * self.bound)  # to [0, 1]
-        x = self.encoder(x)
-        h = self.sigma_net(x)
-        # sigma = F.relu(h[..., 0])
-        density = h[..., 0]
-        # add density bias
-        dist = torch.norm(x_raw, dim=-1)
-        density_bias = (1 - dist / self.density_std) * self.density_scale
-        density = density_bias + density
-        sigma = F.softplus(density)
-        geo_feat = h[..., 1:]
-        return {
-            'sigma': sigma,
-            'geo_feat': geo_feat,
-        }
-    # allow masked inference
-    def color(self, x, d, mask=None, geo_feat=None, **kwargs):
-        # x: [N, 3] in [-bound, bound]
-        # mask: [N,], bool, indicates where we actually needs to compute rgb.
-        x = (x + self.bound) / (2 * self.bound)  # to [0, 1]
-        if mask is not None:
-            rgbs = torch.zeros(mask.shape[0], 3, dtype=x.dtype, device=x.device) # [N, 3]
-            # in case of empty mask
-            if not mask.any():
-                return rgbs
-            x = x[mask]
-            d = d[mask]
-            geo_feat = geo_feat[mask]
-        # color
-        d = (d + 1) / 2  # tcnn SH encoding requires inputs to be in [0, 1]
-        d = self.encoder_dir(d)
-        h = torch.cat([d, geo_feat], dim=-1)
-        h = self.color_net(h)
-        # sigmoid activation for rgb
-        h = torch.sigmoid(h)
-        if mask is not None:
-            rgbs[mask] = h.to(rgbs.dtype)  # fp16 --> fp32
-        else:
-            rgbs = h
-        return rgbs

renderer/renderer.py DELETED Viewed

@@ -1,604 +0,0 @@
-import abc
-import os
-from pathlib import Path
-import cv2
-import numpy as np
-import pytorch_lightning as pl
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from omegaconf import OmegaConf
-from skimage.io import imread, imsave
-from PIL import Image
-from torch.optim.lr_scheduler import LambdaLR
-from ldm.base_utils import read_pickle, concat_images_list
-from renderer.neus_networks import SDFNetwork, RenderingNetwork, SingleVarianceNetwork, SDFHashGridNetwork, RenderingFFNetwork
-from renderer.ngp_renderer import NGPNetwork
-from ldm.util import instantiate_from_config
-DEFAULT_RADIUS = np.sqrt(3)/2
-DEFAULT_SIDE_LENGTH = 0.6
-def sample_pdf(bins, weights, n_samples, det=True):
-    device = bins.device
-    dtype = bins.dtype
-    # This implementation is from NeRF
-    # Get pdf
-    weights = weights + 1e-5  # prevent nans
-    pdf = weights / torch.sum(weights, -1, keepdim=True)
-    cdf = torch.cumsum(pdf, -1)
-    cdf = torch.cat([torch.zeros_like(cdf[..., :1]), cdf], -1)
-    # Take uniform samples
-    if det:
-        u = torch.linspace(0. + 0.5 / n_samples, 1. - 0.5 / n_samples, steps=n_samples, dtype=dtype, device=device)
-        u = u.expand(list(cdf.shape[:-1]) + [n_samples])
-    else:
-        u = torch.rand(list(cdf.shape[:-1]) + [n_samples], dtype=dtype, device=device)
-    # Invert CDF
-    u = u.contiguous()
-    inds = torch.searchsorted(cdf, u, right=True)
-    below = torch.max(torch.zeros_like(inds - 1), inds - 1)
-    above = torch.min((cdf.shape[-1] - 1) * torch.ones_like(inds), inds)
-    inds_g = torch.stack([below, above], -1)  # (batch, N_samples, 2)
-    matched_shape = [inds_g.shape[0], inds_g.shape[1], cdf.shape[-1]]
-    cdf_g = torch.gather(cdf.unsqueeze(1).expand(matched_shape), 2, inds_g)
-    bins_g = torch.gather(bins.unsqueeze(1).expand(matched_shape), 2, inds_g)
-    denom = (cdf_g[..., 1] - cdf_g[..., 0])
-    denom = torch.where(denom < 1e-5, torch.ones_like(denom), denom)
-    t = (u - cdf_g[..., 0]) / denom
-    samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0])
-    return samples
-def near_far_from_sphere(rays_o, rays_d, radius=DEFAULT_RADIUS):
-    a = torch.sum(rays_d ** 2, dim=-1, keepdim=True)
-    b = torch.sum(rays_o * rays_d, dim=-1, keepdim=True)
-    mid = -b / a
-    near = mid - radius
-    far = mid + radius
-    return near, far
-class BackgroundRemoval:
-    def __init__(self, device='cuda'):
-        from carvekit.api.high import HiInterface
-        self.interface = HiInterface(
-            object_type="object",  # Can be "object" or "hairs-like".
-            batch_size_seg=5,
-            batch_size_matting=1,
-            device=device,
-            seg_mask_size=640,  # Use 640 for Tracer B7 and 320 for U2Net
-            matting_mask_size=2048,
-            trimap_prob_threshold=231,
-            trimap_dilation=30,
-            trimap_erosion_iters=5,
-            fp16=True,
-        )
-    @torch.no_grad()
-    def __call__(self, image):
-        # image: [H, W, 3] array in [0, 255].
-        image = Image.fromarray(image)
-        image = self.interface([image])[0]
-        image = np.array(image)
-        return image
-class BaseRenderer(nn.Module):
-    def __init__(self, train_batch_num, test_batch_num):
-        super().__init__()
-        self.train_batch_num = train_batch_num
-        self.test_batch_num = test_batch_num
-    @abc.abstractmethod
-    def render_impl(self, ray_batch, is_train, step):
-        pass
-    @abc.abstractmethod
-    def render_with_loss(self, ray_batch, is_train, step):
-        pass
-    def render(self, ray_batch, is_train, step):
-        batch_num = self.train_batch_num if is_train else self.test_batch_num
-        ray_num = ray_batch['rays_o'].shape[0]
-        outputs = {}
-        for ri in range(0, ray_num, batch_num):
-            cur_ray_batch = {}
-            for k, v in ray_batch.items():
-                cur_ray_batch[k] = v[ri:ri + batch_num]
-            cur_outputs = self.render_impl(cur_ray_batch, is_train, step)
-            for k, v in cur_outputs.items():
-                if k not in outputs: outputs[k] = []
-                outputs[k].append(v)
-        for k, v in outputs.items():
-            outputs[k] = torch.cat(v, 0)
-        return outputs
-class NeuSRenderer(BaseRenderer):
-    def __init__(self, train_batch_num, test_batch_num, lambda_eikonal_loss=0.1, use_mask=True,
-                 lambda_rgb_loss=1.0, lambda_mask_loss=0.0, rgb_loss='soft_l1', coarse_sn=64, fine_sn=64):
-        super().__init__(train_batch_num, test_batch_num)
-        self.n_samples = coarse_sn
-        self.n_importance = fine_sn
-        self.up_sample_steps = 4
-        self.anneal_end = 200
-        self.use_mask = use_mask
-        self.lambda_eikonal_loss = lambda_eikonal_loss
-        self.lambda_rgb_loss = lambda_rgb_loss
-        self.lambda_mask_loss = lambda_mask_loss
-        self.rgb_loss = rgb_loss
-        self.sdf_network = SDFNetwork(d_out=257, d_in=3, d_hidden=256, n_layers=8, skip_in=[4], multires=6, bias=0.5, scale=1.0, geometric_init=True, weight_norm=True)
-        self.color_network = RenderingNetwork(d_feature=256, d_in=9, d_out=3, d_hidden=256, n_layers=4, weight_norm=True, multires_view=4, squeeze_out=True)
-        self.default_dtype = torch.float32
-        self.deviation_network = SingleVarianceNetwork(0.3)
-    @torch.no_grad()
-    def get_vertex_colors(self, vertices):
-        """
-        @param vertices:  n,3
-        @return:
-        """
-        V = vertices.shape[0]
-        bn = 20480
-        verts_colors = []
-        with torch.no_grad():
-            for vi in range(0, V, bn):
-                verts = torch.from_numpy(vertices[vi:vi+bn].astype(np.float32)).cuda()
-                feats = self.sdf_network(verts)[..., 1:]
-                gradients = self.sdf_network.gradient(verts)  # ...,3
-                gradients = F.normalize(gradients, dim=-1)
-                colors = self.color_network(verts, gradients, gradients, feats)
-                colors = torch.clamp(colors,min=0,max=1).cpu().numpy()
-                verts_colors.append(colors)
-        verts_colors = (np.concatenate(verts_colors, 0)*255).astype(np.uint8)
-        return verts_colors
-    def upsample(self, rays_o, rays_d, z_vals, sdf, n_importance, inv_s):
-        """
-        Up sampling give a fixed inv_s
-        """
-        device = rays_o.device
-        batch_size, n_samples = z_vals.shape
-        pts = rays_o[:, None, :] + rays_d[:, None, :] * z_vals[..., :, None]  # n_rays, n_samples, 3
-        inner_mask = self.get_inner_mask(pts)
-        # radius = torch.linalg.norm(pts, ord=2, dim=-1, keepdim=False)
-        inside_sphere = inner_mask[:, :-1] | inner_mask[:, 1:]
-        sdf = sdf.reshape(batch_size, n_samples)
-        prev_sdf, next_sdf = sdf[:, :-1], sdf[:, 1:]
-        prev_z_vals, next_z_vals = z_vals[:, :-1], z_vals[:, 1:]
-        mid_sdf = (prev_sdf + next_sdf) * 0.5
-        cos_val = (next_sdf - prev_sdf) / (next_z_vals - prev_z_vals + 1e-5)
-        prev_cos_val = torch.cat([torch.zeros([batch_size, 1], dtype=self.default_dtype, device=device), cos_val[:, :-1]], dim=-1)
-        cos_val = torch.stack([prev_cos_val, cos_val], dim=-1)
-        cos_val, _ = torch.min(cos_val, dim=-1, keepdim=False)
-        cos_val = cos_val.clip(-1e3, 0.0) * inside_sphere
-        dist = (next_z_vals - prev_z_vals)
-        prev_esti_sdf = mid_sdf - cos_val * dist * 0.5
-        next_esti_sdf = mid_sdf + cos_val * dist * 0.5
-        prev_cdf = torch.sigmoid(prev_esti_sdf * inv_s)
-        next_cdf = torch.sigmoid(next_esti_sdf * inv_s)
-        alpha = (prev_cdf - next_cdf + 1e-5) / (prev_cdf + 1e-5)
-        weights = alpha * torch.cumprod(
-            torch.cat([torch.ones([batch_size, 1], dtype=self.default_dtype, device=device), 1. - alpha + 1e-7], -1), -1)[:, :-1]
-        z_samples = sample_pdf(z_vals, weights, n_importance, det=True).detach()
-        return z_samples
-    def cat_z_vals(self, rays_o, rays_d, z_vals, new_z_vals, sdf, last=False):
-        batch_size, n_samples = z_vals.shape
-        _, n_importance = new_z_vals.shape
-        pts = rays_o[:, None, :] + rays_d[:, None, :] * new_z_vals[..., :, None]
-        z_vals = torch.cat([z_vals, new_z_vals], dim=-1)
-        z_vals, index = torch.sort(z_vals, dim=-1)
-        if not last:
-            device = pts.device
-            new_sdf = self.sdf_network.sdf(pts.reshape(-1, 3)).reshape(batch_size, n_importance)
-            sdf = torch.cat([sdf, new_sdf], dim=-1)
-            xx = torch.arange(batch_size)[:, None].expand(batch_size, n_samples + n_importance).reshape(-1).to(device)
-            index = index.reshape(-1)
-            sdf = sdf[(xx, index)].reshape(batch_size, n_samples + n_importance)
-        return z_vals, sdf
-    def sample_depth(self, rays_o, rays_d, near, far, perturb):
-        n_samples = self.n_samples
-        n_importance = self.n_importance
-        up_sample_steps = self.up_sample_steps
-        device = rays_o.device
-        # sample points
-        batch_size = len(rays_o)
-        z_vals = torch.linspace(0.0, 1.0, n_samples, dtype=self.default_dtype, device=device)   # sn
-        z_vals = near + (far - near) * z_vals[None, :]            # rn,sn
-        if perturb > 0:
-            t_rand = (torch.rand([batch_size, 1]).to(device) - 0.5)
-            z_vals = z_vals + t_rand * 2.0 / n_samples
-        # Up sample
-        with torch.no_grad():
-            pts = rays_o[:, None, :] + rays_d[:, None, :] * z_vals[..., :, None]
-            sdf = self.sdf_network.sdf(pts).reshape(batch_size, n_samples)
-            for i in range(up_sample_steps):
-                rn, sn = z_vals.shape
-                inv_s = torch.ones(rn, sn - 1, dtype=self.default_dtype, device=device) * 64 * 2 ** i
-                new_z_vals = self.upsample(rays_o, rays_d, z_vals, sdf, n_importance // up_sample_steps, inv_s)
-                z_vals, sdf = self.cat_z_vals(rays_o, rays_d, z_vals, new_z_vals, sdf, last=(i + 1 == up_sample_steps))
-        return z_vals
-    def compute_sdf_alpha(self, points, dists, dirs, cos_anneal_ratio, step):
-        # points [...,3] dists [...] dirs[...,3]
-        sdf_nn_output = self.sdf_network(points)
-        sdf = sdf_nn_output[..., 0]
-        feature_vector = sdf_nn_output[..., 1:]
-        gradients = self.sdf_network.gradient(points)  # ...,3
-        inv_s = self.deviation_network(points).clip(1e-6, 1e6)  # ...,1
-        inv_s = inv_s[..., 0]
-        true_cos = (dirs * gradients).sum(-1)  # [...]
-        iter_cos = -(F.relu(-true_cos * 0.5 + 0.5) * (1.0 - cos_anneal_ratio) +
-                     F.relu(-true_cos) * cos_anneal_ratio)  # always non-positive
-        # Estimate signed distances at section points
-        estimated_next_sdf = sdf + iter_cos * dists * 0.5
-        estimated_prev_sdf = sdf - iter_cos * dists * 0.5
-        prev_cdf = torch.sigmoid(estimated_prev_sdf * inv_s)
-        next_cdf = torch.sigmoid(estimated_next_sdf * inv_s)
-        p = prev_cdf - next_cdf
-        c = prev_cdf
-        alpha = ((p + 1e-5) / (c + 1e-5)).clip(0.0, 1.0)  # [...]
-        return alpha, gradients, feature_vector, inv_s, sdf
-    def get_anneal_val(self, step):
-        if self.anneal_end < 0:
-            return 1.0
-        else:
-            return np.min([1.0, step / self.anneal_end])
-    def get_inner_mask(self, points):
-        return torch.sum(torch.abs(points)<=DEFAULT_SIDE_LENGTH,-1)==3
-    def render_impl(self, ray_batch, is_train, step):
-        near, far = near_far_from_sphere(ray_batch['rays_o'], ray_batch['rays_d'])
-        rays_o, rays_d = ray_batch['rays_o'], ray_batch['rays_d']
-        z_vals = self.sample_depth(rays_o, rays_d, near, far, is_train)
-        batch_size, n_samples = z_vals.shape
-        # section length in original space
-        dists = z_vals[..., 1:] - z_vals[..., :-1]  # rn,sn-1
-        dists = torch.cat([dists, dists[..., -1:]], -1)  # rn,sn
-        mid_z_vals = z_vals + dists * 0.5
-        points = rays_o.unsqueeze(-2) + rays_d.unsqueeze(-2) * mid_z_vals.unsqueeze(-1) # rn, sn, 3
-        inner_mask = self.get_inner_mask(points)
-        dirs = rays_d.unsqueeze(-2).expand(batch_size, n_samples, 3)
-        dirs = F.normalize(dirs, dim=-1)
-        device = rays_o.device
-        alpha, sampled_color, gradient_error, normal = torch.zeros(batch_size, n_samples, dtype=self.default_dtype, device=device), \
-            torch.zeros(batch_size, n_samples, 3, dtype=self.default_dtype, device=device), \
-            torch.zeros([batch_size, n_samples], dtype=self.default_dtype, device=device), \
-            torch.zeros([batch_size, n_samples, 3], dtype=self.default_dtype, device=device)
-        if torch.sum(inner_mask) > 0:
-            cos_anneal_ratio = self.get_anneal_val(step) if is_train else 1.0
-            alpha[inner_mask], gradients, feature_vector, inv_s, sdf = self.compute_sdf_alpha(points[inner_mask], dists[inner_mask], dirs[inner_mask], cos_anneal_ratio, step)
-            sampled_color[inner_mask] = self.color_network(points[inner_mask], gradients, -dirs[inner_mask], feature_vector)
-            # Eikonal loss
-            gradient_error[inner_mask] = (torch.linalg.norm(gradients, ord=2, dim=-1) - 1.0) ** 2 # rn,sn
-            normal[inner_mask] = F.normalize(gradients, dim=-1)
-        weights = alpha * torch.cumprod(torch.cat([torch.ones([batch_size, 1], dtype=self.default_dtype, device=device), 1. - alpha + 1e-7], -1), -1)[..., :-1]  # rn,sn
-        mask = torch.sum(weights,dim=1).unsqueeze(-1) # rn,1
-        color = (sampled_color * weights[..., None]).sum(dim=1) + (1 - mask) # add white background
-        normal = (normal * weights[..., None]).sum(dim=1)
-        outputs = {
-            'rgb': color,  # rn,3
-            'gradient_error': gradient_error,  # rn,sn
-            'inner_mask': inner_mask,  # rn,sn
-            'normal': normal,  # rn,3
-            'mask': mask,  # rn,1
-        }
-        return outputs
-    def render_with_loss(self, ray_batch, is_train, step):
-        render_outputs = self.render(ray_batch, is_train, step)
-        rgb_gt = ray_batch['rgb']
-        rgb_pr = render_outputs['rgb']
-        if self.rgb_loss == 'soft_l1':
-            epsilon = 0.001
-            rgb_loss = torch.sqrt(torch.sum((rgb_gt - rgb_pr) ** 2, dim=-1) + epsilon)
-        elif self.rgb_loss =='mse':
-            rgb_loss = F.mse_loss(rgb_pr, rgb_gt, reduction='none')
-        else:
-            raise NotImplementedError
-        rgb_loss = torch.mean(rgb_loss)
-        eikonal_loss = torch.sum(render_outputs['gradient_error'] * render_outputs['inner_mask']) / torch.sum(render_outputs['inner_mask'] + 1e-5)
-        loss = rgb_loss * self.lambda_rgb_loss + eikonal_loss * self.lambda_eikonal_loss
-        loss_batch = {
-            'eikonal': eikonal_loss,
-            'rendering': rgb_loss,
-            # 'mask': mask_loss,
-        }
-        if self.lambda_mask_loss>0 and self.use_mask:
-            mask_loss = F.mse_loss(render_outputs['mask'], ray_batch['mask'], reduction='none').mean()
-            loss += mask_loss * self.lambda_mask_loss
-            loss_batch['mask'] = mask_loss
-        return loss, loss_batch
-class NeRFRenderer(BaseRenderer):
-    def __init__(self, train_batch_num, test_batch_num, bound=0.5, use_mask=False, lambda_rgb_loss=1.0, lambda_mask_loss=0.0):
-        super().__init__(train_batch_num, test_batch_num)
-        self.train_batch_num = train_batch_num
-        self.test_batch_num = test_batch_num
-        self.use_mask = use_mask
-        self.field = NGPNetwork(bound=bound)
-        self.update_interval = 16
-        self.fp16 = True
-        self.lambda_rgb_loss = lambda_rgb_loss
-        self.lambda_mask_loss = lambda_mask_loss
-    def render_impl(self, ray_batch, is_train, step):
-        rays_o, rays_d = ray_batch['rays_o'], ray_batch['rays_d']
-        with torch.cuda.amp.autocast(enabled=self.fp16):
-            if step % self.update_interval==0:
-                self.field.update_extra_state()
-            outputs = self.field.render(rays_o, rays_d,)
-        renderings={
-            'rgb': outputs['image'],
-            'depth': outputs['depth'],
-            'mask': outputs['weights_sum'].unsqueeze(-1),
-        }
-        return renderings
-    def render_with_loss(self, ray_batch, is_train, step):
-        render_outputs = self.render(ray_batch, is_train, step)
-        rgb_gt = ray_batch['rgb']
-        rgb_pr = render_outputs['rgb']
-        epsilon = 0.001
-        rgb_loss = torch.sqrt(torch.sum((rgb_gt - rgb_pr) ** 2, dim=-1) + epsilon)
-        rgb_loss = torch.mean(rgb_loss)
-        loss = rgb_loss * self.lambda_rgb_loss
-        loss_batch = {'rendering': rgb_loss}
-        if self.use_mask:
-            mask_loss = F.mse_loss(render_outputs['mask'], ray_batch['mask'], reduction='none')
-            mask_loss = torch.mean(mask_loss)
-            loss = loss + mask_loss * self.lambda_mask_loss
-            loss_batch['mask'] = mask_loss
-        return loss, loss_batch
-class RendererTrainer(pl.LightningModule):
-    def __init__(self, image_path, total_steps, warm_up_steps, log_dir, train_batch_fg_num=0,
-                 use_cube_feats=False, cube_ckpt=None, cube_cfg=None, cube_bound=0.5,
-                 train_batch_num=4096, test_batch_num=8192, use_warm_up=True, use_mask=True,
-                 lambda_rgb_loss=1.0, lambda_mask_loss=0.0, renderer='neus',
-                 # used in neus
-                 lambda_eikonal_loss=0.1,
-                 coarse_sn=64, fine_sn=64):
-        super().__init__()
-        self.num_images = 16
-        self.image_size = 256
-        self.log_dir = log_dir
-        (Path(log_dir)/'images').mkdir(exist_ok=True, parents=True)
-        self.train_batch_num = train_batch_num
-        self.train_batch_fg_num = train_batch_fg_num
-        self.test_batch_num = test_batch_num
-        self.image_path = image_path
-        self.total_steps = total_steps
-        self.warm_up_steps = warm_up_steps
-        self.use_mask = use_mask
-        self.lambda_eikonal_loss = lambda_eikonal_loss
-        self.lambda_rgb_loss = lambda_rgb_loss
-        self.lambda_mask_loss = lambda_mask_loss
-        self.use_warm_up = use_warm_up
-        self.use_cube_feats, self.cube_cfg, self.cube_ckpt = use_cube_feats, cube_cfg, cube_ckpt
-        self._init_dataset()
-        if renderer=='neus':
-            self.renderer = NeuSRenderer(train_batch_num, test_batch_num,
-                                         lambda_rgb_loss=lambda_rgb_loss,
-                                         lambda_eikonal_loss=lambda_eikonal_loss,
-                                         lambda_mask_loss=lambda_mask_loss,
-                                         coarse_sn=coarse_sn, fine_sn=fine_sn)
-        elif renderer=='ngp':
-            self.renderer = NeRFRenderer(train_batch_num, test_batch_num, bound=cube_bound, use_mask=use_mask, lambda_mask_loss=lambda_mask_loss, lambda_rgb_loss=lambda_rgb_loss,)
-        else:
-            raise NotImplementedError
-        self.validation_index = 0
-    def _construct_ray_batch(self, images_info):
-        image_num = images_info['images'].shape[0]
-        _, h, w, _ = images_info['images'].shape
-        coords = torch.stack(torch.meshgrid(torch.arange(h), torch.arange(w)), -1)[:, :, (1, 0)]  # h,w,2
-        coords = coords.float()[None, :, :, :].repeat(image_num, 1, 1, 1)  # imn,h,w,2
-        coords = coords.reshape(image_num, h * w, 2)
-        coords = torch.cat([coords, torch.ones(image_num, h * w, 1, dtype=torch.float32)], 2)  # imn,h*w,3
-        # imn,h*w,3 @ imn,3,3 => imn,h*w,3
-        rays_d = coords @ torch.inverse(images_info['Ks']).permute(0, 2, 1)
-        poses = images_info['poses']  # imn,3,4
-        R, t = poses[:, :, :3], poses[:, :, 3:]
-        rays_d = rays_d @ R
-        rays_d = F.normalize(rays_d, dim=-1)
-        rays_o = -R.permute(0,2,1) @ t # imn,3,3 @ imn,3,1
-        rays_o = rays_o.permute(0, 2, 1).repeat(1, h*w, 1) # imn,h*w,3
-        ray_batch = {
-            'rgb': images_info['images'].reshape(image_num*h*w,3),
-            'mask': images_info['masks'].reshape(image_num*h*w,1),
-            'rays_o': rays_o.reshape(image_num*h*w,3).float(),
-            'rays_d': rays_d.reshape(image_num*h*w,3).float(),
-        }
-        return ray_batch
-    @staticmethod
-    def load_model(cfg, ckpt):
-        config = OmegaConf.load(cfg)
-        model = instantiate_from_config(config.model)
-        print(f'loading model from {ckpt} ...')
-        ckpt = torch.load(ckpt)
-        model.load_state_dict(ckpt['state_dict'])
-        model = model.cuda().eval()
-        return model
-    def _init_dataset(self):
-        mask_predictor = BackgroundRemoval()
-        self.K, self.azs, self.els, self.dists, self.poses = read_pickle(f'meta_info/camera-{self.num_images}.pkl')
-        self.images_info = {'images': [] ,'masks': [], 'Ks': [], 'poses':[]}
-        img = imread(self.image_path)
-        for index in range(self.num_images):
-            rgb = np.copy(img[:,index*self.image_size:(index+1)*self.image_size,:])
-            # predict mask
-            if self.use_mask:
-                imsave(f'{self.log_dir}/input-{index}.png', rgb)
-                masked_image = mask_predictor(rgb)
-                imsave(f'{self.log_dir}/masked-{index}.png', masked_image)
-                mask = masked_image[:,:,3].astype(np.float32)/255
-            else:
-                h, w, _ = rgb.shape
-                mask = np.zeros([h,w], np.float32)
-            rgb = rgb.astype(np.float32)/255
-            K, pose = np.copy(self.K), self.poses[index]
-            self.images_info['images'].append(torch.from_numpy(rgb.astype(np.float32))) # h,w,3
-            self.images_info['masks'].append(torch.from_numpy(mask.astype(np.float32))) # h,w
-            self.images_info['Ks'].append(torch.from_numpy(K.astype(np.float32)))
-            self.images_info['poses'].append(torch.from_numpy(pose.astype(np.float32)))
-        for k, v in self.images_info.items(): self.images_info[k] = torch.stack(v, 0) # stack all values
-        self.train_batch = self._construct_ray_batch(self.images_info)
-        self.train_batch_pseudo_fg = {}
-        pseudo_fg_mask = torch.sum(self.train_batch['rgb']>0.99,1)!=3
-        for k, v in self.train_batch.items():
-            self.train_batch_pseudo_fg[k] = v[pseudo_fg_mask]
-        self.train_ray_fg_num = int(torch.sum(pseudo_fg_mask).cpu().numpy())
-        self.train_ray_num = self.num_images * self.image_size ** 2
-        self._shuffle_train_batch()
-        self._shuffle_train_fg_batch()
-    def _shuffle_train_batch(self):
-        self.train_batch_i = 0
-        shuffle_idxs = torch.randperm(self.train_ray_num, device='cpu') # shuffle
-        for k, v in self.train_batch.items():
-            self.train_batch[k] = v[shuffle_idxs]
-    def _shuffle_train_fg_batch(self):
-        self.train_batch_fg_i = 0
-        shuffle_idxs = torch.randperm(self.train_ray_fg_num, device='cpu') # shuffle
-        for k, v in self.train_batch_pseudo_fg.items():
-            self.train_batch_pseudo_fg[k] = v[shuffle_idxs]
-    def training_step(self, batch, batch_idx):
-        train_ray_batch = {k: v[self.train_batch_i:self.train_batch_i + self.train_batch_num].cuda() for k, v in self.train_batch.items()}
-        self.train_batch_i += self.train_batch_num
-        if self.train_batch_i + self.train_batch_num >= self.train_ray_num: self._shuffle_train_batch()
-        if self.train_batch_fg_num>0:
-            train_ray_batch_fg = {k: v[self.train_batch_fg_i:self.train_batch_fg_i+self.train_batch_fg_num].cuda() for k, v in self.train_batch_pseudo_fg.items()}
-            self.train_batch_fg_i += self.train_batch_fg_num
-            if self.train_batch_fg_i + self.train_batch_fg_num >= self.train_ray_fg_num: self._shuffle_train_fg_batch()
-            for k, v in train_ray_batch_fg.items():
-                train_ray_batch[k] = torch.cat([train_ray_batch[k], v], 0)
-        loss, loss_batch = self.renderer.render_with_loss(train_ray_batch, is_train=True, step=self.global_step)
-        self.log_dict(loss_batch, prog_bar=True, logger=True, on_step=True, on_epoch=False, rank_zero_only=True)
-        self.log('step', self.global_step, prog_bar=True, on_step=True, on_epoch=False, logger=False, rank_zero_only=True)
-        lr = self.optimizers().param_groups[0]['lr']
-        self.log('lr', lr, prog_bar=True, logger=True, on_step=True, on_epoch=False, rank_zero_only=True)
-        return loss
-    def _slice_images_info(self, index):
-        return {k:v[index:index+1] for k, v in self.images_info.items()}
-    @torch.no_grad()
-    def validation_step(self, batch, batch_idx):
-        with torch.no_grad():
-            if self.global_rank==0:
-                # we output an rendering image
-                images_info = self._slice_images_info(self.validation_index)
-                self.validation_index += 1
-                self.validation_index %= self.num_images
-                test_ray_batch = self._construct_ray_batch(images_info)
-                test_ray_batch = {k: v.cuda() for k,v in test_ray_batch.items()}
-                test_ray_batch['near'], test_ray_batch['far'] = near_far_from_sphere(test_ray_batch['rays_o'], test_ray_batch['rays_d'])
-                render_outputs = self.renderer.render(test_ray_batch, False, self.global_step)
-                process = lambda x: (x.cpu().numpy() * 255).astype(np.uint8)
-                h, w = self.image_size, self.image_size
-                rgb = torch.clamp(render_outputs['rgb'].reshape(h, w, 3), max=1.0, min=0.0)
-                mask = torch.clamp(render_outputs['mask'].reshape(h, w, 1), max=1.0, min=0.0)
-                mask_ = torch.repeat_interleave(mask, 3, dim=-1)
-                output_image = concat_images_list(process(rgb), process(mask_))
-                if 'normal' in render_outputs:
-                    normal = torch.clamp((render_outputs['normal'].reshape(h, w, 3) + 1) / 2, max=1.0, min=0.0)
-                    normal = normal * mask # we only show foregound normal
-                    output_image = concat_images_list(output_image, process(normal))
-                # save images
-                imsave(f'{self.log_dir}/images/{self.global_step}.jpg', output_image)
-    def configure_optimizers(self):
-        lr = self.learning_rate
-        opt = torch.optim.AdamW([{"params": self.renderer.parameters(), "lr": lr},], lr=lr)
-        def schedule_fn(step):
-            total_step = self.total_steps
-            warm_up_step = self.warm_up_steps
-            warm_up_init = 0.02
-            warm_up_end = 1.0
-            final_lr = 0.02
-            interval = 1000
-            times = total_step // interval
-            ratio = np.power(final_lr, 1/times)
-            if step<warm_up_step:
-                learning_rate = (step / warm_up_step) * (warm_up_end - warm_up_init) + warm_up_init
-            else:
-                learning_rate = ratio ** (step // interval) * warm_up_end
-            return learning_rate
-        if self.use_warm_up:
-            scheduler = [{
-                    'scheduler': LambdaLR(opt, lr_lambda=schedule_fn),
-                    'interval': 'step',
-                    'frequency': 1
-                }]
-        else:
-            scheduler = []
-        return [opt], scheduler

requirements.txt CHANGED Viewed

@@ -19,5 +19,4 @@ trimesh
 easydict
 nerfacc
 imageio-ffmpeg==0.4.7
-git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
 git+https://github.com/openai/CLIP.git

 easydict
 nerfacc
 imageio-ffmpeg==0.4.7
 git+https://github.com/openai/CLIP.git

train_renderer.py DELETED Viewed

@@ -1,187 +0,0 @@
-import argparse
-import imageio
-import numpy as np
-import torch
-import torch.nn.functional as F
-from pathlib import Path
-import trimesh
-from omegaconf import OmegaConf
-from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor, Callback
-from pytorch_lightning.loggers import TensorBoardLogger
-from pytorch_lightning import Trainer
-from skimage.io import imsave
-from tqdm import tqdm
-import mcubes
-from ldm.base_utils import read_pickle, output_points
-from renderer.renderer import NeuSRenderer, DEFAULT_SIDE_LENGTH
-from ldm.util import instantiate_from_config
-class ResumeCallBacks(Callback):
-    def __init__(self):
-        pass
-    def on_train_start(self, trainer, pl_module):
-        pl_module.optimizers().param_groups = pl_module.optimizers()._optimizer.param_groups
-def render_images(model, output,):
-    # render from model
-    n = 180
-    azimuths = (np.arange(n) / n * np.pi * 2).astype(np.float32)
-    elevations = np.deg2rad(np.asarray([30] * n).astype(np.float32))
-    K, _, _, _, poses = read_pickle(f'meta_info/camera-16.pkl')
-    output_points
-    h, w = 256, 256
-    default_size = 256
-    K = np.diag([w/default_size,h/default_size,1.0]) @ K
-    imgs = []
-    for ni in tqdm(range(n)):
-        # R = euler2mat(azimuths[ni], elevations[ni], 0, 'szyx')
-        # R = np.asarray([[0,-1,0],[0,0,-1],[1,0,0]]) @ R
-        e, a = elevations[ni], azimuths[ni]
-        row1 = np.asarray([np.sin(e)*np.cos(a),np.sin(e)*np.sin(a),-np.cos(e)])
-        row0 = np.asarray([-np.sin(a),np.cos(a), 0])
-        row2 = np.cross(row0, row1)
-        R = np.stack([row0,row1,row2],0)
-        t = np.asarray([0,0,1.5])
-        pose = np.concatenate([R,t[:,None]],1)
-        pose_ = torch.from_numpy(pose.astype(np.float32)).unsqueeze(0)
-        K_ = torch.from_numpy(K.astype(np.float32)).unsqueeze(0) # [1,3,3]
-        coords = torch.stack(torch.meshgrid(torch.arange(h), torch.arange(w)), -1)[:, :, (1, 0)]  # h,w,2
-        coords = coords.float()[None, :, :, :].repeat(1, 1, 1, 1)  # imn,h,w,2
-        coords = coords.reshape(1, h * w, 2)
-        coords = torch.cat([coords, torch.ones(1, h * w, 1, dtype=torch.float32)], 2)  # imn,h*w,3
-        # imn,h*w,3 @ imn,3,3 => imn,h*w,3
-        rays_d = coords @ torch.inverse(K_).permute(0, 2, 1)
-        R, t = pose_[:, :, :3], pose_[:, :, 3:]
-        rays_d = rays_d @ R
-        rays_d = F.normalize(rays_d, dim=-1)
-        rays_o = -R.permute(0, 2, 1) @ t  # imn,3,3 @ imn,3,1
-        rays_o = rays_o.permute(0, 2, 1).repeat(1, h * w, 1)  # imn,h*w,3
-        ray_batch = {
-            'rays_o': rays_o.reshape(-1,3).cuda(),
-            'rays_d': rays_d.reshape(-1,3).cuda(),
-        }
-        with torch.no_grad():
-            image = model.renderer.render(ray_batch,False,5000)['rgb'].reshape(h,w,3)
-        image = (image.cpu().numpy() * 255).astype(np.uint8)
-        imgs.append(image)
-    imageio.mimsave(f'{output}/rendering.mp4', imgs, fps=30)
-def extract_fields(bound_min, bound_max, resolution, query_func, batch_size=64, outside_val=1.0):
-    N = batch_size
-    X = torch.linspace(bound_min[0], bound_max[0], resolution).split(N)
-    Y = torch.linspace(bound_min[1], bound_max[1], resolution).split(N)
-    Z = torch.linspace(bound_min[2], bound_max[2], resolution).split(N)
-    u = np.zeros([resolution, resolution, resolution], dtype=np.float32)
-    with torch.no_grad():
-        for xi, xs in enumerate(X):
-            for yi, ys in enumerate(Y):
-                for zi, zs in enumerate(Z):
-                    xx, yy, zz = torch.meshgrid(xs, ys, zs)
-                    pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1).cuda()
-                    val = query_func(pts).detach()
-                    outside_mask = torch.norm(pts,dim=-1)>=1.0
-                    val[outside_mask]=outside_val
-                    val = val.reshape(len(xs), len(ys), len(zs)).cpu().numpy()
-                    u[xi * N: xi * N + len(xs), yi * N: yi * N + len(ys), zi * N: zi * N + len(zs)] = val
-    return u
-def extract_geometry(bound_min, bound_max, resolution, threshold, query_func, color_func, outside_val=1.0):
-    u = extract_fields(bound_min, bound_max, resolution, query_func, outside_val=outside_val)
-    vertices, triangles = mcubes.marching_cubes(u, threshold)
-    b_max_np = bound_max.detach().cpu().numpy()
-    b_min_np = bound_min.detach().cpu().numpy()
-    vertices = vertices / (resolution - 1.0) * (b_max_np - b_min_np)[None, :] + b_min_np[None, :]
-    vertex_colors = color_func(vertices)
-    return vertices, triangles, vertex_colors
-def extract_mesh(model, output, resolution=512):
-    if not isinstance(model.renderer, NeuSRenderer): return
-    bbox_min = -torch.ones(3)*DEFAULT_SIDE_LENGTH
-    bbox_max = torch.ones(3)*DEFAULT_SIDE_LENGTH
-    with torch.no_grad():
-        vertices, triangles, vertex_colors = extract_geometry(bbox_min, bbox_max, resolution, 0, lambda x: model.renderer.sdf_network.sdf(x), lambda x: model.renderer.get_vertex_colors(x))
-    # output geometry
-    mesh = trimesh.Trimesh(vertices, triangles, vertex_colors=vertex_colors)
-    mesh.export(str(f'{output}/mesh.ply'))
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--image_path', type=str, required=True)
-    parser.add_argument('-n', '--name', type=str, required=True)
-    parser.add_argument('-b', '--base', type=str, default='configs/neus.yaml')
-    parser.add_argument('-l', '--log', type=str, default='output/renderer')
-    parser.add_argument('-s', '--seed', type=int, default=6033)
-    parser.add_argument('-g', '--gpus', type=str, default='0,')
-    parser.add_argument('-r', '--resume', action='store_true', default=False, dest='resume')
-    parser.add_argument('--fp16', action='store_true', default=False, dest='fp16')
-    opt = parser.parse_args()
-    # seed_everything(opt.seed)
-    # configs
-    cfg = OmegaConf.load(opt.base)
-    name = opt.name
-    log_dir, ckpt_dir = Path(opt.log) / name, Path(opt.log) / name / 'ckpt'
-    cfg.model.params['image_path'] = opt.image_path
-    cfg.model.params['log_dir'] = log_dir
-    # setup
-    log_dir.mkdir(exist_ok=True, parents=True)
-    ckpt_dir.mkdir(exist_ok=True, parents=True)
-    trainer_config = cfg.trainer
-    callback_config = cfg.callbacks
-    model_config = cfg.model
-    data_config = cfg.data
-    data_config.params.seed = opt.seed
-    data = instantiate_from_config(data_config)
-    data.prepare_data()
-    data.setup('fit')
-    model = instantiate_from_config(model_config,)
-    model.cpu()
-    model.learning_rate = model_config.base_lr
-    # logger
-    logger = TensorBoardLogger(save_dir=log_dir, name='tensorboard_logs')
-    callbacks=[]
-    callbacks.append(LearningRateMonitor(logging_interval='step'))
-    callbacks.append(ModelCheckpoint(dirpath=ckpt_dir, filename="{epoch:06}", verbose=True, save_last=True, every_n_train_steps=callback_config.save_interval))
-    # trainer
-    trainer_config.update({
-        "accelerator": "cuda", "check_val_every_n_epoch": None,
-        "benchmark": True, "num_sanity_val_steps": 0,
-        "devices": 1, "gpus": opt.gpus,
-    })
-    if opt.fp16:
-        trainer_config['precision']=16
-    if opt.resume:
-        callbacks.append(ResumeCallBacks())
-        trainer_config['resume_from_checkpoint'] = str(ckpt_dir / 'last.ckpt')
-    else:
-        if (ckpt_dir / 'last.ckpt').exists():
-            raise RuntimeError(f"checkpoint {ckpt_dir / 'last.ckpt'} existing ...")
-    trainer = Trainer.from_argparse_args(args=argparse.Namespace(), **trainer_config, logger=logger, callbacks=callbacks)
-    trainer.fit(model, data)
-    model = model.cuda().eval()
-    render_images(model, log_dir)
-    extract_mesh(model, log_dir)
-if __name__=="__main__":
-    main()

train_syncdreamer.py DELETED Viewed

@@ -1,307 +0,0 @@
-import argparse, os, sys
-import numpy as np
-import time
-import torch
-import torch.nn as nn
-import torchvision
-import pytorch_lightning as pl
-from omegaconf import OmegaConf
-from PIL import Image
-from pytorch_lightning import seed_everything
-from pytorch_lightning.strategies import DDPStrategy
-from pytorch_lightning.trainer import Trainer
-from pytorch_lightning.callbacks import ModelCheckpoint, Callback, LearningRateMonitor
-from pytorch_lightning.utilities import rank_zero_info, rank_zero_only
-from ldm.util import instantiate_from_config
-@rank_zero_only
-def rank_zero_print(*args):
-    print(*args)
-def get_parser(**parser_kwargs):
-    def str2bool(v):
-        if isinstance(v, bool):
-            return v
-        if v.lower() in ("yes", "true", "t", "y", "1"):
-            return True
-        elif v.lower() in ("no", "false", "f", "n", "0"):
-            return False
-        else:
-            raise argparse.ArgumentTypeError("Boolean value expected.")
-    parser = argparse.ArgumentParser(**parser_kwargs)
-    parser.add_argument("-r", "--resume", dest='resume', action='store_true', default=False)
-    parser.add_argument("-b", "--base", type=str, default='configs/syncdreamer-training.yaml',)
-    parser.add_argument("-l", "--logdir", type=str, default="ckpt/logs", help="directory for logging data", )
-    parser.add_argument("-c", "--ckptdir", type=str, default="ckpt/models", help="directory for checkpoint data", )
-    parser.add_argument("-s", "--seed", type=int, default=6033, help="seed for seed_everything", )
-    parser.add_argument("--finetune_from", type=str, default="/cfs-cq-dcc/rondyliu/models/sd-image-conditioned-v2.ckpt", help="path to checkpoint to load model state from" )
-    parser.add_argument("--gpus", type=str, default='0,')
-    return parser
-def trainer_args(opt):
-    parser = argparse.ArgumentParser()
-    parser = Trainer.add_argparse_args(parser)
-    args = parser.parse_args([])
-    return sorted(k for k in vars(args) if hasattr(opt, k))
-class SetupCallback(Callback):
-    def __init__(self, resume, logdir, ckptdir, cfgdir, config):
-        super().__init__()
-        self.resume = resume
-        self.logdir = logdir
-        self.ckptdir = ckptdir
-        self.cfgdir = cfgdir
-        self.config = config
-    def on_fit_start(self, trainer, pl_module):
-        if trainer.global_rank == 0:
-            # Create logdirs and save configs
-            os.makedirs(self.logdir, exist_ok=True)
-            os.makedirs(self.ckptdir, exist_ok=True)
-            os.makedirs(self.cfgdir, exist_ok=True)
-            rank_zero_print(OmegaConf.to_yaml(self.config))
-            OmegaConf.save(self.config, os.path.join(self.cfgdir, "configs.yaml"))
-            if not self.resume and os.path.exists(os.path.join(self.logdir,'checkpoints','last.ckpt')):
-                raise RuntimeError(f"checkpoint {os.path.join(self.logdir,'checkpoints','last.ckpt')} existing")
-class ImageLogger(Callback):
-    def __init__(self, batch_frequency, max_images, log_images_kwargs=None):
-        super().__init__()
-        self.batch_freq = batch_frequency
-        self.max_images = max_images
-        self.log_images_kwargs = log_images_kwargs if log_images_kwargs else {}
-    @rank_zero_only
-    def log_to_logger(self, pl_module, images, split):
-        for k in images:
-            grid = torchvision.utils.make_grid(images[k])
-            grid = (grid + 1.0) / 2.0  # -1,1 -> 0,1; c,h,w
-            tag = f"{split}/{k}"
-            pl_module.logger.experiment.add_image(tag, grid, global_step=pl_module.global_step)
-    @rank_zero_only
-    def log_to_file(self, save_dir, split, images, global_step, current_epoch):
-        root = os.path.join(save_dir, "images", split)
-        for k in images:
-            grid = torchvision.utils.make_grid(images[k], nrow=4)
-            grid = (grid + 1.0) / 2.0  # -1,1 -> 0,1; c,h,w
-            grid = grid.transpose(0, 1).transpose(1, 2).squeeze(-1)
-            grid = grid.numpy()
-            grid = (grid * 255).astype(np.uint8)
-            filename = "{:06}-{:06}-{}.jpg".format(global_step, current_epoch, k)
-            path = os.path.join(root, filename)
-            os.makedirs(os.path.split(path)[0], exist_ok=True)
-            Image.fromarray(grid).save(path)
-    @rank_zero_only
-    def log_img(self, pl_module, batch, split="train"):
-        if split == "val": should_log = True
-        else: should_log = self.check_frequency(pl_module.global_step)
-        if should_log:
-            is_train = pl_module.training
-            if is_train: pl_module.eval()
-            with torch.no_grad():
-                images = pl_module.log_images(batch, split=split, **self.log_images_kwargs)
-            for k in images:
-                N = min(images[k].shape[0], self.max_images)
-                images[k] = images[k][:N]
-                if isinstance(images[k], torch.Tensor):
-                    images[k] = images[k].detach().cpu()
-                    images[k] = torch.clamp(images[k], -1., 1.)
-            self.log_to_file(pl_module.logger.save_dir, split, images, pl_module.global_step, pl_module.current_epoch)
-            # self.log_to_logger(pl_module, images, split)
-            if is_train: pl_module.train()
-    def check_frequency(self, check_idx):
-        if (check_idx % self.batch_freq) == 0 and check_idx > 0:
-            return True
-        else:
-            return False
-    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
-        self.log_img(pl_module, batch, split="train")
-    @rank_zero_only
-    def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx=0):
-        # print('validation ....')
-        # print(dataloader_idx)
-        # print(batch_idx)
-        if batch_idx==0: self.log_img(pl_module, batch, split="val")
-class CUDACallback(Callback):
-    # see https://github.com/SeanNaren/minGPT/blob/master/mingpt/callback.py
-    def on_train_epoch_start(self, trainer, pl_module):
-        # Reset the memory use counter
-        torch.cuda.reset_peak_memory_stats(trainer.strategy.root_device.index)
-        torch.cuda.synchronize(trainer.strategy.root_device.index)
-        self.start_time = time.time()
-    def on_train_epoch_end(self, trainer, pl_module):
-        torch.cuda.synchronize(trainer.strategy.root_device.index)
-        max_memory = torch.cuda.max_memory_allocated(trainer.strategy.root_device.index) / 2 ** 20
-        epoch_time = time.time() - self.start_time
-        try:
-            max_memory = trainer.strategy.reduce(max_memory)
-            epoch_time = trainer.strategy.reduce(epoch_time)
-            rank_zero_info(f"Average Epoch time: {epoch_time:.2f} seconds")
-            rank_zero_info(f"Average Peak memory {max_memory:.2f}MiB")
-        except AttributeError:
-            pass
-def get_node_name(name, parent_name):
-    if len(name) <= len(parent_name):
-        return False, ''
-    p = name[:len(parent_name)]
-    if p != parent_name:
-        return False, ''
-    return True, name[len(parent_name):]
-class ResumeCallBacks(Callback):
-    def on_train_start(self, trainer, pl_module):
-        pl_module.optimizers().param_groups = pl_module.optimizers()._optimizer.param_groups
-def load_pretrain_stable_diffusion(new_model, finetune_from):
-    rank_zero_print(f"Attempting to load state from {finetune_from}")
-    old_state = torch.load(finetune_from, map_location="cpu")
-    if "state_dict" in old_state: old_state = old_state["state_dict"]
-    in_filters_load = old_state["model.diffusion_model.input_blocks.0.0.weight"]
-    new_state = new_model.state_dict()
-    if "model.diffusion_model.input_blocks.0.0.weight" in new_state:
-        in_filters_current = new_state["model.diffusion_model.input_blocks.0.0.weight"]
-        in_shape = in_filters_current.shape
-        ## because the model adopts additional inputs as conditions.
-        if in_shape != in_filters_load.shape:
-            input_keys = ["model.diffusion_model.input_blocks.0.0.weight", "model_ema.diffusion_modelinput_blocks00weight",]
-            for input_key in input_keys:
-                if input_key not in old_state or input_key not in new_state:
-                    continue
-                input_weight = new_state[input_key]
-                if input_weight.size() != old_state[input_key].size():
-                    print(f"Manual init: {input_key}")
-                    input_weight.zero_()
-                    input_weight[:, :4, :, :].copy_(old_state[input_key])
-                old_state[input_key] = torch.nn.parameter.Parameter(input_weight)
-    new_model.load_state_dict(old_state, strict=False)
-def get_optional_dict(name, config):
-    if name in config:
-        cfg = config[name]
-    else:
-        cfg =  OmegaConf.create()
-    return cfg
-if __name__ == "__main__":
-    # now = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
-    sys.path.append(os.getcwd())
-    opt = get_parser().parse_args()
-    assert opt.base != ''
-    name = os.path.split(opt.base)[-1]
-    name = os.path.splitext(name)[0]
-    logdir = os.path.join(opt.logdir, name)
-    # logdir: checkpoints+configs
-    ckptdir = os.path.join(opt.ckptdir, name)
-    cfgdir = os.path.join(logdir, "configs")
-    if opt.resume:
-        ckpt = os.path.join(ckptdir, "last.ckpt")
-        opt.resume_from_checkpoint = ckpt
-        opt.finetune_from = "" # disable finetune checkpoint
-    seed_everything(opt.seed)
-    ###################config#####################
-    config = OmegaConf.load(opt.base)  # loade default configs
-    lightning_config = config.lightning
-    trainer_config = config.lightning.trainer
-    for k in trainer_args(opt): # overwrite trainer configs
-        trainer_config[k] = getattr(opt, k)
-    ###################trainer#####################
-    # training framework
-    gpuinfo = trainer_config["gpus"]
-    rank_zero_print(f"Running on GPUs {gpuinfo}")
-    ngpu = len(trainer_config.gpus.strip(",").split(','))
-    trainer_config['devices'] = ngpu
-    ###################model#####################
-    model = instantiate_from_config(config.model)
-    model.cpu()
-    # load stable diffusion parameters
-    if opt.finetune_from != "":
-        load_pretrain_stable_diffusion(model, opt.finetune_from)
-    ###################logger#####################
-    # default logger configs
-    default_logger_cfg = {"target": "pytorch_lightning.loggers.TensorBoardLogger",
-                          "params": {"save_dir": logdir, "name": "tensorboard_logs", }}
-    logger_cfg = OmegaConf.create(default_logger_cfg)
-    logger = instantiate_from_config(logger_cfg)
-    ###################callbacks#####################
-    # default ckpt callbacks
-    default_modelckpt_cfg = {"target": "pytorch_lightning.callbacks.ModelCheckpoint",
-                             "params": {"dirpath": ckptdir, "filename": "{epoch:06}", "verbose": True, "save_last": True, "every_n_train_steps": 5000}}
-    modelckpt_cfg = OmegaConf.merge(default_modelckpt_cfg, get_optional_dict("modelcheckpoint", lightning_config))  # overwrite checkpoint configs
-    default_modelckpt_cfg_repeat = {"target": "pytorch_lightning.callbacks.ModelCheckpoint",
-                                    "params": {"dirpath": ckptdir, "filename": "{step:08}", "verbose": True, "save_last": False, "every_n_train_steps": 5000, "save_top_k": -1}}
-    modelckpt_cfg_repeat = OmegaConf.merge(default_modelckpt_cfg_repeat)
-    # add callback which sets up log directory
-    default_callbacks_cfg = {
-        "setup_callback": {
-            "target": "train_syncdreamer.SetupCallback",
-            "params": {"resume": opt.resume, "logdir": logdir, "ckptdir": ckptdir, "cfgdir": cfgdir, "config": config}
-        },
-        "learning_rate_logger": {
-            "target": "train_syncdreamer.LearningRateMonitor",
-            "params": {"logging_interval": "step"}
-        },
-        "cuda_callback": {"target": "train_syncdreamer.CUDACallback"},
-    }
-    callbacks_cfg = OmegaConf.merge(default_callbacks_cfg, get_optional_dict("callbacks", lightning_config))
-    callbacks_cfg['model_ckpt'] = modelckpt_cfg  # add checkpoint
-    callbacks_cfg['model_ckpt_repeat'] = modelckpt_cfg_repeat  # add checkpoint
-    callbacks = [instantiate_from_config(callbacks_cfg[k]) for k in callbacks_cfg]  # construct all callbacks
-    if opt.resume:
-        callbacks.append(ResumeCallBacks())
-    trainer = Trainer.from_argparse_args(args=argparse.Namespace(), **trainer_config,
-                                         accelerator='cuda', strategy=DDPStrategy(find_unused_parameters=False), logger=logger, callbacks=callbacks)
-    trainer.logdir = logdir
-    ###################data#####################
-    config.data.params.seed = opt.seed
-    data = instantiate_from_config(config.data)
-    data.prepare_data()
-    data.setup('fit')
-    ####################lr#####################
-    bs, base_lr = config.data.params.batch_size, config.model.base_learning_rate
-    accumulate_grad_batches = trainer_config.accumulate_grad_batches if hasattr(trainer_config, "trainer_config") else 1
-    rank_zero_print(f"accumulate_grad_batches = {accumulate_grad_batches}")
-    model.learning_rate = base_lr
-    rank_zero_print("++++ NOT USING LR SCALING ++++")
-    rank_zero_print(f"Setting learning rate to {model.learning_rate:.2e}")
-    model.image_dir = logdir # used in output images during training
-    # run
-    trainer.fit(model, data)