File size: 5,307 Bytes
0c09666 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import gradio as gr
import numpy as np
from PIL import Image
import cv2
from moviepy.editor import VideoFileClip
from share_btn import community_icon_html, loading_icon_html, share_js
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video
def convert_mp4_to_frames(video_path, duration=3):
# Read the video file
video = cv2.VideoCapture(video_path)
# Get the frames per second (fps) of the video
fps = video.get(cv2.CAP_PROP_FPS)
# Calculate the number of frames to extract
num_frames = int(fps * duration)
frames = []
frame_count = 0
# Iterate through each frame
while True:
# Read a frame
ret, frame = video.read()
# If the frame was not successfully read or we have reached the desired duration, break the loop
if not ret or frame_count == num_frames:
break
# Convert BGR to RGB
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Append the frame to the list of frames
frames.append(frame)
frame_count += 1
# Release the video object
video.release()
# Convert the list of frames to a numpy array
frames = np.array(frames)
return frames
def infer(prompt, video_in, denoise_strength):
negative_prompt = "text, watermark, copyright, blurry, nsfw"
video = convert_mp4_to_frames(video_in, duration=3)
video_resized = [Image.fromarray(frame).resize((1024, 576)) for frame in video]
pipe_xl = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_XL", torch_dtype=torch.float32, revision="refs/pr/17")
pipe_xl.vae.enable_slicing()
pipe_xl.scheduler = DPMSolverMultistepScheduler.from_config(pipe_xl.scheduler.config)
pipe_xl.enable_model_cpu_offload()
pipe_xl.to("cpu")
video_frames = pipe_xl(prompt, negative_prompt=negative_prompt, video=video_resized, strength=denoise_strength).frames
del pipe_xl
#torch.cuda.empty_cache()
video_path = export_to_video(video_frames, output_video_path="xl_result.mp4")
return "xl_result.mp4", gr.Group.update(visible=True)
css = """
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
.animate-spin {
animation: spin 1s linear infinite;
}
@keyframes spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
#share-btn-container {
display: flex;
padding-left: 0.5rem !important;
padding-right: 0.5rem !important;
background-color: #000000;
justify-content: center;
align-items: center;
border-radius: 9999px !important;
max-width: 13rem;
}
#share-btn-container:hover {
background-color: #060606;
}
#share-btn {
all: initial;
color: #ffffff;
font-weight: 600;
cursor:pointer;
font-family: 'IBM Plex Sans', sans-serif;
margin-left: 0.5rem !important;
padding-top: 0.5rem !important;
padding-bottom: 0.5rem !important;
right:0;
}
#share-btn * {
all: unset;
}
#share-btn-container div:nth-child(-n+2){
width: auto !important;
min-height: 0px !important;
}
#share-btn-container .wrap {
display: none !important;
}
#share-btn-container.hidden {
display: none!important;
}
img[src*='#center'] {
display: block;
margin: auto;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(
"""
<h1 style="text-align: center;">Zeroscope XL</h1>
<p style="text-align: center;">
This space is specifically designed for upscaling content made from <br />
<a href="https://huggingface.co/spaces/fffiloni/zeroscope">the zeroscope_v2_576w space</a> using vid2vid. <br />
Remember to use the same prompt that was used to generate the original clip.<br />
For demo purpose, video length is limited to 3 seconds.
</p>
[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center)](https://huggingface.co/spaces/fffiloni/zeroscope-XL?duplicate=true)
"""
)
video_in = gr.Video(type="numpy", source="upload")
prompt_in = gr.Textbox(label="Prompt", placeholder="This must be the same prompt you used for the original clip :)", elem_id="prompt-in")
denoise_strength = gr.Slider(label="Denoise strength", minimum=0.6, maximum=0.9, step=0.01, value=0.66)
#inference_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=1, value=40, interactive=False)
submit_btn = gr.Button("Submit")
video_result = gr.Video(label="Video Output", elem_id="video-output")
with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
community_icon = gr.HTML(community_icon_html)
loading_icon = gr.HTML(loading_icon_html)
share_button = gr.Button("Share to community", elem_id="share-btn")
submit_btn.click(fn=infer,
inputs=[prompt_in, video_in, denoise_strength],
outputs=[video_result, share_group])
share_button.click(None, [], [], _js=share_js)
demo.queue(max_size=12).launch() |