Lotus_Depth_video

Paused

App Files Files Community

ghostsInTheMachine commited on 16 days ago

Commit

c12e34c

•

1 Parent(s): 7bb1989

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -33

app.py CHANGED Viewed

@@ -1,23 +1,72 @@
 import gradio as gr
 import torch
-import os
 import tempfile
-import shutil
 import time
 import ffmpeg
-import numpy as np
-from PIL import Image
 from concurrent.futures import ThreadPoolExecutor
-import moviepy.editor as mp
 from infer import lotus  # Import the depth model inference function
-import spaces
-# Set device to use the L40s GPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Add the preprocess_video function to limit video resolution and frame rate
-def preprocess_video(video_path, target_fps=24, max_resolution=(1920, 1080)):
-    """Preprocess the video to resize and reduce its frame rate."""
     video = mp.VideoFileClip(video_path)
     # Resize video if it's larger than the target resolution
@@ -29,8 +78,11 @@ def preprocess_video(video_path, target_fps=24, max_resolution=(1920, 1080)):
     return video
-def process_frame(frame, seed=0):
-    """Process a single frame through the depth model and return depth map."""
     try:
         # Convert frame to PIL Image
         image = Image.fromarray(frame)
@@ -39,7 +91,7 @@ def process_frame(frame, seed=0):
         with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
             image.save(tmp.name)
-            # Process through the depth model (lotus)
             _, output_d = lotus(tmp.name, 'depth', seed, device)
             # Clean up temp file
@@ -54,10 +106,14 @@ def process_frame(frame, seed=0):
         return None
 @spaces.GPU
-def process_video(video_path, fps=0, seed=0, max_workers=32):
-    """Process video, batch frames, and use L40s GPU to generate depth maps."""
     temp_dir = None
     try:
         start_time = time.time()
         # Preprocess the video
@@ -77,13 +133,11 @@ def process_video(video_path, fps=0, seed=0, max_workers=32):
         frames_dir = os.path.join(temp_dir, "frames")
         os.makedirs(frames_dir, exist_ok=True)
-        # Process frames in larger batches (based on GPU VRAM)
-        batch_size = 50  # Increased batch size to fully utilize the GPU's capabilities
         processed_frames = []
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
-            for i in range(0, total_frames, batch_size):
-                futures = [executor.submit(process_frame, frames[j], seed) for j in range(i, min(i + batch_size, total_frames))]
                 for j, future in enumerate(futures):
                     try:
                         result = future.result()
@@ -95,10 +149,12 @@ def process_video(video_path, fps=0, seed=0, max_workers=32):
                             # Collect processed frame for preview
                             processed_frames.append(result)
-                            # Update preview (only showing every 10th frame to avoid clutter)
-                            if (i + j + 1) % 10 == 0:
-                                elapsed_time = time.time() - start_time
-                                yield processed_frames[-1], None, None, f"Processed {i+j+1}/{total_frames} frames... Elapsed: {elapsed_time:.2f}s"
                     except Exception as e:
                         print(f"Error processing frame {i + j + 1}: {e}")
@@ -113,6 +169,7 @@ def process_video(video_path, fps=0, seed=0, max_workers=32):
         shutil.make_archive(zip_path[:-4], 'zip', frames_dir)
         # Create MP4 video
         video_filename = f"depth_video_{int(time.time())}.mp4"
         video_path = os.path.join(output_dir, video_filename)
@@ -153,7 +210,7 @@ def process_video(video_path, fps=0, seed=0, max_workers=32):
             except Exception as e:
                 print(f"Error cleaning up temp directory: {e}")
-def process_wrapper(video, fps=0, seed=0, max_workers=32):
     if video is None:
         raise gr.Error("Please upload a video.")
     try:
@@ -197,7 +254,7 @@ custom_css = """
 """
 # Gradio Interface
-with gr.Blocks(css=custom_css) as demo:
     gr.HTML('''
         <div class="title-container">
             <div id="title">Video Depth Estimation</div>
@@ -206,10 +263,36 @@ with gr.Blocks(css=custom_css) as demo:
     with gr.Row():
         with gr.Column():
-            video_input = gr.Video(label="Upload Video", interactive=True, show_label=True)
-            fps_slider = gr.Slider(minimum=0, maximum=60, step=1, value=0, label="Output FPS")
-            seed_slider = gr.Slider(minimum=0, maximum=999999999, step=1, value=0, label="Seed")
-            max_workers_slider = gr.Slider(minimum=1, maximum=32, step=1, value=32, label="Max Workers")
             btn = gr.Button("Process Video", elem_id="submit-button")
         with gr.Column():
@@ -218,12 +301,39 @@ with gr.Blocks(css=custom_css) as demo:
             output_video = gr.File(label="Download Video (MP4)")
             time_textbox = gr.Textbox(label="Status", interactive=False)
-    btn.click(fn=process_wrapper
-, inputs=[video_input, fps_slider, seed_slider, max_workers_slider],
-              outputs=[preview_image, output_frames_zip, output_video, time_textbox])
     demo.queue()
 if __name__ == "__main__":
-    demo.launch(debug=True)

 import gradio as gr
 import torch
+import spaces
+import moviepy.editor as mp
+from PIL import Image
+import numpy as np
 import tempfile
 import time
+import os
+import shutil
 import ffmpeg
 from concurrent.futures import ThreadPoolExecutor
+from gradio.themes.base import Base
+from gradio.themes.utils import colors, fonts
 from infer import lotus  # Import the depth model inference function
+# Custom Theme Definition
+class WhiteTheme(Base):
+    def __init__(
+        self,
+        *,
+        primary_hue: colors.Color | str = colors.orange,
+        font: fonts.Font | str | tuple[fonts.Font | str, ...] = (
+            fonts.GoogleFont("Inter"),
+            "ui-sans-serif",
+            "system-ui",
+            "sans-serif",
+        ),
+        font_mono: fonts.Font | str | tuple[fonts.Font | str, ...] = (
+            fonts.GoogleFont("Inter"),
+            "ui-monospace",
+            "system-ui",
+            "monospace",
+        )
+    ):
+        super().__init__(
+            primary_hue=primary_hue,
+            font=font,
+            font_mono=font_mono,
+        )
+        self.set(
+            background_fill_primary="*primary_50",
+            background_fill_secondary="white",
+            border_color_primary="*primary_300",
+            body_background_fill="white",
+            body_background_fill_dark="white",
+            block_background_fill="white",
+            block_background_fill_dark="white",
+            panel_background_fill="white",
+            panel_background_fill_dark="white",
+            body_text_color="black",
+            body_text_color_dark="black",
+            block_label_text_color="black",
+            block_label_text_color_dark="black",
+            block_border_color="white",
+            panel_border_color="white",
+            input_border_color="lightgray",
+            input_background_fill="white",
+            input_background_fill_dark="white",
+            shadow_drop="none"
+        )
+# Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Add the preprocess_video function to limit video resolution and frame rate
+def preprocess_video(video_path, target_fps=24, max_resolution=(640, 360)):
+    """Preprocess the video to reduce its resolution and frame rate."""
     video = mp.VideoFileClip(video_path)
     # Resize video if it's larger than the target resolution
     return video
+def process_frame(frame, seed=0, start_time=None):
+    """
+    Process a single frame through the depth model.
+    Returns the discriminative depth map.
+    """
     try:
         # Convert frame to PIL Image
         image = Image.fromarray(frame)
         with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
             image.save(tmp.name)
+            # Process through lotus model
             _, output_d = lotus(tmp.name, 'depth', seed, device)
             # Clean up temp file
         return None
 @spaces.GPU
+def process_video(video_path, fps=0, seed=0, max_workers=2):
+    """
+    Process video to create depth map sequence and video.
+    Maintains original resolution and framerate if fps=0.
+    """
     temp_dir = None
     try:
+        # Initialize start_time here for use in process_frame
         start_time = time.time()
         # Preprocess the video
         frames_dir = os.path.join(temp_dir, "frames")
         os.makedirs(frames_dir, exist_ok=True)
+        # Process frames in batches of 10
         processed_frames = []
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            for i in range(0, total_frames, 10):  # Process 10 frames at a time
+                futures = [executor.submit(process_frame, frames[j], seed, start_time) for j in range(i, min(i + 10, total_frames))]
                 for j, future in enumerate(futures):
                     try:
                         result = future.result()
                             # Collect processed frame for preview
                             processed_frames.append(result)
+                            # Update preview
+                            elapsed_time = time.time() - start_time
+                            yield processed_frames[-1], None, None, f"Processing frame {i+j+1}/{total_frames}... Elapsed time: {elapsed_time:.2f} seconds"
+                        if (i + j + 1) % 10 == 0:
+                            print(f"Processed {i + j + 1}/{total_frames} frames")
                     except Exception as e:
                         print(f"Error processing frame {i + j + 1}: {e}")
         shutil.make_archive(zip_path[:-4], 'zip', frames_dir)
         # Create MP4 video
+        print("Creating MP4 video...")
         video_filename = f"depth_video_{int(time.time())}.mp4"
         video_path = os.path.join(output_dir, video_filename)
             except Exception as e:
                 print(f"Error cleaning up temp directory: {e}")
+def process_wrapper(video, fps=0, seed=0, max_workers=6):
     if video is None:
         raise gr.Error("Please upload a video.")
     try:
 """
 # Gradio Interface
+with gr.Blocks(css=custom_css, theme=WhiteTheme()) as demo:
     gr.HTML('''
         <div class="title-container">
             <div id="title">Video Depth Estimation</div>
     with gr.Row():
         with gr.Column():
+            video_input = gr.Video(
+                label="Upload Video",
+                interactive=True,
+                show_label=True,
+                height=360,
+                width=640
+            )
+            with gr.Row():
+                fps_slider = gr.Slider(
+                    minimum=0,
+                    maximum=60,
+                    step=1,
+                    value=0,
+                    label="Output FPS (0 will inherit the original fps value)",
+                )
+                seed_slider = gr.Slider(
+                    minimum=0,
+                    maximum=999999999,
+                    step=1,
+                    value=0,
+                    label="Seed",
+                )
+                max_workers_slider = gr.Slider(
+                    minimum=1,
+                    maximum=32,
+                    step=1,
+                    value=6,
+                    label="Max Workers",
+                    info="Determines how many frames to process in parallel"
+                )
             btn = gr.Button("Process Video", elem_id="submit-button")
         with gr.Column():
             output_video = gr.File(label="Download Video (MP4)")
             time_textbox = gr.Textbox(label="Status", interactive=False)
+            gr.Markdown("""
+            ### Output Information
+            - High-quality MP4 video output
+            - Original resolution and framerate are maintained
+            - Frame sequence provided for maximum compatibility
+            """)
+    btn.click(
+        fn=process_wrapper,
+        inputs=[video_input, fps_slider, seed_slider, max_workers_slider],
+        outputs=[preview_image, output_frames_zip, output_video, time_textbox]
+    )
     demo.queue()
+    api = gr.Interface(
+        fn=process_wrapper,
+        inputs=[
+            gr.Video(label="Upload Video"),
+            gr.Number(label="FPS", value=0),
+            gr.Number(label="Seed", value=0),
+            gr.Number(label="Max Workers", value=6)
+        ],
+        outputs=[
+            gr.Image(label="Preview"),
+            gr.File(label="Frame Sequence"),
+            gr.File(label="Video"),
+            gr.Textbox(label="Status")
+        ],
+        title="Video Depth Estimation API",
+        description="Generate depth maps from videos",
+        api_name="/process_video"
+    )
 if __name__ == "__main__":
+    demo.launch(debug=True, show_error=True, share=False, server_name="0.0.0.0", server_port=7860)