Spaces:

lemonaddie
/

geowizard

Build error

App Files Files Community

lemonaddie commited on Mar 26

Commit

fbf7415

•

1 Parent(s): 47e2130

Upload app_recon.py

Browse files

Files changed (1) hide show

app_recon.py +295 -0

app_recon.py ADDED Viewed

	@@ -0,0 +1,295 @@

+import functools
+import os
+import shutil
+import sys
+import git
+import gradio as gr
+import numpy as np
+import torch as torch
+from PIL import Image
+from gradio_imageslider import ImageSlider
+from bilateral_normal_integration.bilateral_normal_integration_cupy import bilateral_normal_integration_function
+import spaces
+import fire
+import argparse
+import os
+import logging
+import numpy as np
+import torch
+from PIL import Image
+from tqdm.auto import tqdm
+import glob
+import json
+import cv2
+from rembg import remove
+from segment_anythi ng import sam_model_registry, SamPredictor
+from datetime import datetime
+import time
+import sys
+sys.path.append("../")
+from models.geowizard_pipeline import DepthNormalEstimationPipeline
+from utils.seed_all import seed_all
+import matplotlib.pyplot as plt
+from utils.de_normalized import align_scale_shift
+from utils.depth2normal import *
+from diffusers import DiffusionPipeline, DDIMScheduler, AutoencoderKL
+from models.unet_2d_condition import UNet2DConditionModel
+from transformers import CLIPTextModel, CLIPTokenizer
+from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
+import torchvision.transforms.functional as TF
+from torchvision.transforms import InterpolationMode
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+stable_diffusion_repo_path = "stabilityai/stable-diffusion-2-1-unclip"
+vae = AutoencoderKL.from_pretrained(stable_diffusion_repo_path, subfolder='vae')
+scheduler = DDIMScheduler.from_pretrained(stable_diffusion_repo_path, subfolder='scheduler')
+sd_image_variations_diffusers_path = 'lambdalabs/sd-image-variations-diffusers'
+image_encoder = CLIPVisionModelWithProjection.from_pretrained(sd_image_variations_diffusers_path, subfolder="image_encoder")
+feature_extractor = CLIPImageProcessor.from_pretrained(sd_image_variations_diffusers_path, subfolder="feature_extractor")
+unet = UNet2DConditionModel.from_pretrained('.', subfolder="unet")
+pipe = DepthNormalEstimationPipeline(vae=vae,
+                            image_encoder=image_encoder,
+                            feature_extractor=feature_extractor,
+                            unet=unet,
+                            scheduler=scheduler)
+try:
+    import xformers
+    pipe.enable_xformers_memory_efficient_attention()
+except:
+    pass  # run without xformers
+pipe = pipe.to(device)
+def sam_init():
+    sam_checkpoint = os.path.join(os.path.dirname(__file__), "sam_pt", "sam_vit_l_0b3195.pth")
+    model_type = "vit_l"
+    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint).to(device=f"cuda")
+    predictor = SamPredictor(sam)
+    return predictor
+sam_predictor = sam_init()
+def sam_segment(predictor, input_image, *bbox_coords):
+    bbox = np.array(bbox_coords)
+    image = np.asarray(input_image)
+    start_time = time.time()
+    predictor.set_image(image)
+    masks_bbox, scores_bbox, logits_bbox = predictor.predict(
+        box=bbox,
+        multimask_output=True
+    )
+    print(f"SAM Time: {time.time() - start_time:.3f}s")
+    out_image = np.zeros((image.shape[0], image.shape[1], 4), dtype=np.uint8)
+    out_image[:, :, :3] = image
+    out_image_bbox = out_image.copy()
+    out_image_bbox[:, :, 3] = masks_bbox[-1].astype(np.uint8) * 255
+    torch.cuda.empty_cache()
+    return Image.fromarray(out_image_bbox, mode='RGBA'), masks_bbox
+@spaces.GPU
+def depth_normal(img_path,
+                denoising_steps,
+                ensemble_size,
+                processing_res,
+                seed,
+                domain):
+    seed = int(seed)
+    if seed >= 0:
+        torch.manual_seed(seed)
+    img = Image.open(img_path)
+    pipe_out = pipe(
+        img,
+        denoising_steps=denoising_steps,
+        ensemble_size=ensemble_size,
+        processing_res=processing_res,
+        batch_size=0,
+        domain=domain,
+        show_progress_bar=True,
+    )
+    depth_colored = pipe_out.depth_colored
+    normal_colored = pipe_out.normal_colored
+    depth_np = pipe_out.depth_np
+    normal_np = pipe_out.normal_np
+    path_output_dir = os.path.splitext(os.path.basename(img_path))[0] + datetime.now().strftime('%Y%m%d-%H%M%S')
+    os.makedirs(path_output_dir, exist_ok=True)
+    name_base = os.path.splitext(os.path.basename(img_path))[0]
+    depth_path = os.path.join(path_output_dir, f"{name_base}_depth.npy")
+    normal_path = os.path.join(path_output_dir, f"{name_base}_normal.npy")
+    np.save(normal_path, normal_np)
+    np.save(depth_path, depth_np)
+    return depth_colored, normal_colored, [depth_path, normal_path]
+def reconstruction(image, files):
+    torch.cuda.empty_cache()
+    img = Image.open(image)
+    image_rem = img.convert('RGBA')
+    image_nobg = remove(image_rem, alpha_matting=True)
+    arr = np.asarray(image_nobg)[:,:,-1]
+    x_nonzero = np.nonzero(arr.sum(axis=0))
+    y_nonzero = np.nonzero(arr.sum(axis=1))
+    x_min = int(x_nonzero[0].min())
+    y_min = int(y_nonzero[0].min())
+    x_max = int(x_nonzero[0].max())
+    y_max = int(y_nonzero[0].max())
+    masked_image, mask = sam_segment(sam_predictor, img.convert('RGB'), x_min, y_min, x_max, y_max)
+    depth_np = np.load(files[0])
+    normal_np = np.load(files[1])
+    dir_name = os.path.dirname(os.path.realpath(files[0]))
+    mask_output_temp = mask[-1]
+    name_base = os.path.splitext(os.path.basename(files[0]))[0][:-6]
+    normal_np[:, :, 0] *= -1
+    _, surface, _,  _, _ = bilateral_normal_integration_function(normal_np, mask_output_temp, k=2, K=None, max_iter=100, tol=1e-4, cg_max_iter=5000, cg_tol=1e-3)
+    ply_path = os.path.join(dir_name, f"{name_base}_mask.ply")
+    surface.save(ply_path, binary=False)
+    return ply_path
+def run_demo():
+    custom_theme = gr.themes.Soft(primary_hue="blue").set(
+                    button_secondary_background_fill="*neutral_100",
+                    button_secondary_background_fill_hover="*neutral_200")
+    custom_css = '''#disp_image {
+        text-align: center; /* Horizontally center the content */
+    }'''
+    _TITLE = '''GeoWizard: Unleashing the Diffusion Priors for 3D Geometry Estimation from a Single Image'''
+    _DESCRIPTION = '''
+    <div>
+    Generate consistent depth and normal from single image. High quality and rich details. (PS: We find the demo running on ZeroGPU output slightly inferior results compared to A100 or 3060 with everything exactly the same.)
+    <a style="display:inline-block; margin-left: .5em" href='https://github.com/fuxiao0719/GeoWizard/'><img src='https://img.shields.io/github/stars/fuxiao0719/GeoWizard?style=social' /></a>
+    </div>
+    '''
+    _GPU_ID = 0
+    with gr.Blocks(title=_TITLE, theme=custom_theme, css=custom_css) as demo:
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown('# ' + _TITLE)
+        gr.Markdown(_DESCRIPTION)
+        with gr.Row(variant='panel'):
+            with gr.Column(scale=1):
+                input_image = gr.Image(type='filepath', height=320, label='Input image')
+                example_folder = os.path.join(os.path.dirname(__file__), "./files")
+                example_fns = [os.path.join(example_folder, example) for example in os.listdir(example_folder)]
+                gr.Examples(
+                    examples=example_fns,
+                    inputs=[input_image],
+                    cache_examples=False,
+                    label='Examples (click one of the images below to start)',
+                    examples_per_page=30
+                )
+            with gr.Column(scale=1):
+                with gr.Accordion('Advanced options', open=True):
+                    with gr.Column():
+                        domain = gr.Radio(
+                         [
+                             ("Outdoor", "outdoor"),
+                             ("Indoor", "indoor"),
+                             ("Object", "object"),
+                         ],
+                         label="Data Type (Must Select One matches your image)",
+                         value="indoor",
+                     )
+                        denoising_steps = gr.Slider(
+                         label="Number of denoising steps (More steps, better quality)",
+                         minimum=1,
+                         maximum=50,
+                         step=1,
+                         value=10,
+                     )
+                        ensemble_size = gr.Slider(
+                         label="Ensemble size (More steps, higher accuracy)",
+                         minimum=1,
+                         maximum=15,
+                         step=1,
+                         value=3,
+                     )
+                        seed = gr.Number(0, label='Random Seed. Negative values for not specifying')
+                        processing_res = gr.Radio(
+                         [
+                             ("Native", 0),
+                             ("Recommended", 768),
+                         ],
+                         label="Processing resolution",
+                         value=768,
+                     )
+                run_btn = gr.Button('Generate', variant='primary', interactive=True)
+        with gr.Row():
+            with gr.Column():
+                depth = gr.Image(interactive=False, show_label=False)
+            with gr.Column():
+                normal = gr.Image(interactive=False, show_label=False)
+        with gr.Row():
+            files = gr.Files(
+                label = "Depth and Normal (numpy)",
+                elem_id = "download",
+                interactive=False,
+            )
+        with gr.Row():
+            recon_btn = gr.Button('Is there a salient foreground object? If yes, Click here to Reconstruct its 3D model.', variant='primary', interactive=True)
+        with gr.Row():
+            reconstructed_3d = gr.Model3D(
+                label = 'Bini post-processed 3D model', height=320, interactive=False,
+            )
+        run_btn.click(fn=depth_normal,
+                        inputs=[input_image, denoising_steps,
+                                ensemble_size,
+                                processing_res,
+                                seed,
+                                domain],
+                        outputs=[depth, normal, files]
+                        )
+        recon_btn.click(fn=reconstruction,
+                        inputs=[input_image, files],
+                        outputs=[reconstructed_3d]
+                        )
+        demo.queue().launch(share=True, max_threads=80)
+if __name__ == '__main__':
+    fire.Fire(run_demo)