Spaces:

lemonaddie
/

geowizard

Build error

App Files Files Community

geowizard / app.py

lemonaddie

Update app.py

9ecd159 verified 8 months ago

raw

history blame

7 kB

	import functools
	import os
	import shutil
	import sys
	import git

	import gradio as gr
	import numpy as np
	import torch as torch
	from PIL import Image

	from gradio_imageslider import ImageSlider

	import spaces

	import fire

	import argparse
	import os
	import logging

	try:
	import cupy
	except:
	print('import cupy failed!')

	import numpy as np
	import torch
	from PIL import Image
	from tqdm.auto import tqdm
	import glob
	import json
	import cv2

	import sys
	sys.path.append("../")
	from models.geowizard_pipeline import DepthNormalEstimationPipeline
	from utils.seed_all import seed_all
	import matplotlib.pyplot as plt
	from utils.de_normalized import align_scale_shift
	from utils.depth2normal import *

	from diffusers import DiffusionPipeline, DDIMScheduler, AutoencoderKL
	from models.unet_2d_condition import UNet2DConditionModel

	from transformers import CLIPTextModel, CLIPTokenizer
	from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
	import torchvision.transforms.functional as TF
	from torchvision.transforms import InterpolationMode

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	stable_diffusion_repo_path = "stabilityai/stable-diffusion-2-1-unclip"
	vae = AutoencoderKL.from_pretrained(stable_diffusion_repo_path, subfolder='vae')
	scheduler = DDIMScheduler.from_pretrained(stable_diffusion_repo_path, subfolder='scheduler')
	sd_image_variations_diffusers_path = 'lambdalabs/sd-image-variations-diffusers'
	image_encoder = CLIPVisionModelWithProjection.from_pretrained(sd_image_variations_diffusers_path, subfolder="image_encoder")
	feature_extractor = CLIPImageProcessor.from_pretrained(sd_image_variations_diffusers_path, subfolder="feature_extractor")
	unet = UNet2DConditionModel.from_pretrained('.', subfolder="unet")

	pipe = DepthNormalEstimationPipeline(vae=vae,
	image_encoder=image_encoder,
	feature_extractor=feature_extractor,
	unet=unet,
	scheduler=scheduler)

	try:
	import xformers
	pipe.enable_xformers_memory_efficient_attention()
	except:
	pass # run without xformers

	pipe = pipe.to(device)

	@spaces.GPU
	def depth_normal(img,
	denoising_steps,
	ensemble_size,
	processing_res,
	seed,
	domain):

	seed = int(seed)
	if seed >= 0:
	torch.manual_seed(seed)

	pipe_out = pipe(
	img,
	denoising_steps=denoising_steps,
	ensemble_size=ensemble_size,
	processing_res=processing_res,
	batch_size=0,
	domain=domain,
	show_progress_bar=True,
	)

	depth_colored = pipe_out.depth_colored
	normal_colored = pipe_out.normal_colored

	return depth_colored, normal_colored



	def run_demo():


	custom_theme = gr.themes.Soft(primary_hue="blue").set(
	button_secondary_background_fill="*neutral_100",
	button_secondary_background_fill_hover="*neutral_200")
	custom_css = '''#disp_image {
	text-align: center; /* Horizontally center the content */
	}'''

	_TITLE = '''GeoWizard: Unleashing the Diffusion Priors for 3D Geometry Estimation from a Single Image'''
	_DESCRIPTION = '''
	<div>
	Generate consistent depth and normal from single image. High quality and rich details. (PS: We find the demo running on ZeroGPU output slightly inferior results compared to A100 or 3060 with everything exactly the same.)
	<a style="display:inline-block; margin-left: .5em" href='https://github.com/fuxiao0719/GeoWizard/'><img src='https://img.shields.io/github/stars/fuxiao0719/GeoWizard?style=social' /></a>
	</div>
	'''
	_GPU_ID = 0

	with gr.Blocks(title=_TITLE, theme=custom_theme, css=custom_css) as demo:
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown('# ' + _TITLE)
	gr.Markdown(_DESCRIPTION)
	with gr.Row(variant='panel'):
	with gr.Column(scale=1):
	input_image = gr.Image(type='pil', image_mode='RGBA', height=320, label='Input image')

	example_folder = os.path.join(os.path.dirname(__file__), "./files")
	example_fns = [os.path.join(example_folder, example) for example in os.listdir(example_folder)]
	gr.Examples(
	examples=example_fns,
	inputs=[input_image],
	cache_examples=False,
	label='Examples (click one of the images below to start)',
	examples_per_page=30
	)
	with gr.Column(scale=1):

	with gr.Accordion('Advanced options', open=True):
	with gr.Column():

	domain = gr.Radio(
	[
	("Outdoor", "outdoor"),
	("Indoor", "indoor"),
	("Object", "object"),
	],
	label="Data Type (Must Select One matches your image)",
	value="indoor",
	)
	denoising_steps = gr.Slider(
	label="Number of denoising steps (More steps, better quality)",
	minimum=1,
	maximum=50,
	step=1,
	value=10,
	)
	ensemble_size = gr.Slider(
	label="Ensemble size (More steps, higher accuracy)",
	minimum=1,
	maximum=15,
	step=1,
	value=3,
	)
	seed = gr.Number(0, label='Random Seed. Negative values for not specifying')

	processing_res = gr.Radio(
	[
	("Native", 0),
	("Recommended", 768),
	],
	label="Processing resolution",
	value=768,
	)


	run_btn = gr.Button('Generate', variant='primary', interactive=True)
	with gr.Row():
	with gr.Column():
	depth = gr.Image(interactive=False, show_label=False)
	with gr.Column():
	normal = gr.Image(interactive=False, show_label=False)


	run_btn.click(fn=depth_normal,
	inputs=[input_image, denoising_steps,
	ensemble_size,
	processing_res,
	seed,
	domain],
	outputs=[depth, normal]
	)
	demo.queue().launch(share=True, max_threads=80)


	if __name__ == '__main__':
	fire.Fire(run_demo)