VideoMatting / app.py
faiimea's picture
Update app.py
f00f52c verified
import argparse
import cv2
import torch
import os
import shutil
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import transforms as T
from torchvision.transforms.functional import to_pil_image
from threading import Thread
from tqdm import tqdm
from PIL import Image
import gradio as gr
from dataset import VideoDataset, ZipDataset
from dataset import augmentation as A
from model import MattingBase, MattingRefine
from inference_utils import HomographicAlignment
# --------------- Utils ---------------
class VideoWriter:
def __init__(self, path, frame_rate, width, height):
self.out = cv2.VideoWriter(path, cv2.VideoWriter_fourcc(*'mp4v'), frame_rate, (width, height))
def add_batch(self, frames):
frames = frames.mul(255).byte()
frames = frames.cpu().permute(0, 2, 3, 1).numpy()
for i in range(frames.shape[0]):
frame = frames[i]
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
self.out.write(frame)
class ImageSequenceWriter:
def __init__(self, path, extension):
self.path = path
self.extension = extension
self.index = 0
os.makedirs(path)
def add_batch(self, frames):
Thread(target=self._add_batch, args=(frames, self.index)).start()
self.index += frames.shape[0]
def _add_batch(self, frames, index):
frames = frames.cpu()
for i in range(frames.shape[0]):
frame = frames[i]
frame = to_pil_image(frame)
frame.save(os.path.join(self.path, str(index + i).zfill(5) + '.' + self.extension))
# --------------- Main ---------------
def video_matting(video_src_content,video_bgr_content):
src_video_path = './source/src_video.mp4'
bgr_image_path = './source/bgr_image.png'
with open(src_video_path, 'wb') as video_file:
video_file.write(video_src_content)
# 写入背景图片文件
with open(bgr_image_path, 'wb') as bgr_file:
bgr_file.write(video_bgr_content)
video_src = src_video_path
video_bgr = bgr_image_path
default_args = {
'model_type': 'mattingrefine',
'model_backbone': 'resnet50',
'model_backbone_scale': 0.25,
'model_refine_mode': 'sampling',
'model_refine_sample_pixels': 80000,
'model_checkpoint': './pytorch_resnet50.pth',
'model_refine_threshold':0.7,
'model_refine_kernel_size':3,
'video_src': './source/src.mp4',
'video_bgr': './source/bgr.png',
'video_target_bgr': None,
'video_resize': [1920, 1080],
'device': 'cpu', # 默认设置为CPU
'preprocess_alignment': False,
'output_dir': './output',
'output_types': ['com'],
'output_format': 'video'
}
args = argparse.Namespace(**default_args)
device = torch.device(args.device)
# Load model
if args.model_type == 'mattingbase':
model = MattingBase(args.model_backbone)
if args.model_type == 'mattingrefine':
model = MattingRefine(
args.model_backbone,
args.model_backbone_scale,
args.model_refine_mode,
args.model_refine_sample_pixels,
args.model_refine_threshold,
args.model_refine_kernel_size)
model = model.to(device).eval()
model.load_state_dict(torch.load(args.model_checkpoint, map_location=device), strict=False)
# Load video and background
vid = VideoDataset(video_src)
bgr = [Image.open(video_bgr).convert('RGB')]
dataset = ZipDataset([vid, bgr], transforms=A.PairCompose([
A.PairApply(T.Resize(args.video_resize[::-1]) if args.video_resize else nn.Identity()),
HomographicAlignment() if args.preprocess_alignment else A.PairApply(nn.Identity()),
A.PairApply(T.ToTensor())
]))
if args.video_target_bgr:
dataset = ZipDataset([dataset, VideoDataset(args.video_target_bgr, transforms=T.ToTensor())])
# Create output directory
# if os.path.exists(args.output_dir):
# if input(f'Directory {args.output_dir} already exists. Override? [Y/N]: ').lower() == 'y':
# shutil.rmtree(args.output_dir)
# else:
# exit()
# os.makedirs(args.output_dir)
# Prepare writers
if args.output_format == 'video':
h = args.video_resize[1] if args.video_resize is not None else vid.height
w = args.video_resize[0] if args.video_resize is not None else vid.width
if 'com' in args.output_types:
com_writer = VideoWriter(os.path.join(args.output_dir, 'com.mp4'), vid.frame_rate, w, h)
if 'pha' in args.output_types:
pha_writer = VideoWriter(os.path.join(args.output_dir, 'pha.mp4'), vid.frame_rate, w, h)
if 'fgr' in args.output_types:
fgr_writer = VideoWriter(os.path.join(args.output_dir, 'fgr.mp4'), vid.frame_rate, w, h)
if 'err' in args.output_types:
err_writer = VideoWriter(os.path.join(args.output_dir, 'err.mp4'), vid.frame_rate, w, h)
if 'ref' in args.output_types:
ref_writer = VideoWriter(os.path.join(args.output_dir, 'ref.mp4'), vid.frame_rate, w, h)
else:
if 'com' in args.output_types:
com_writer = ImageSequenceWriter(os.path.join(args.output_dir, 'com'), 'png')
if 'pha' in args.output_types:
pha_writer = ImageSequenceWriter(os.path.join(args.output_dir, 'pha'), 'jpg')
if 'fgr' in args.output_types:
fgr_writer = ImageSequenceWriter(os.path.join(args.output_dir, 'fgr'), 'jpg')
if 'err' in args.output_types:
err_writer = ImageSequenceWriter(os.path.join(args.output_dir, 'err'), 'jpg')
if 'ref' in args.output_types:
ref_writer = ImageSequenceWriter(os.path.join(args.output_dir, 'ref'), 'jpg')
# Conversion loop
with torch.no_grad():
for input_batch in tqdm(DataLoader(dataset, batch_size=1, pin_memory=True)):
if args.video_target_bgr:
(src, bgr), tgt_bgr = input_batch
tgt_bgr = tgt_bgr.to(device, non_blocking=True)
else:
src, bgr = input_batch
tgt_bgr = torch.tensor([120/255, 255/255, 155/255], device=device).view(1, 3, 1, 1)
src = src.to(device, non_blocking=True)
bgr = bgr.to(device, non_blocking=True)
if args.model_type == 'mattingbase':
pha, fgr, err, _ = model(src, bgr)
elif args.model_type == 'mattingrefine':
pha, fgr, _, _, err, ref = model(src, bgr)
elif args.model_type == 'mattingbm':
pha, fgr = model(src, bgr)
if 'com' in args.output_types:
if args.output_format == 'video':
# Output composite with green background
com = fgr * pha + tgt_bgr * (1 - pha)
com_writer.add_batch(com)
else:
# Output composite as rgba png images
com = torch.cat([fgr * pha.ne(0), pha], dim=1)
com_writer.add_batch(com)
if 'pha' in args.output_types:
pha_writer.add_batch(pha)
if 'fgr' in args.output_types:
fgr_writer.add_batch(fgr)
if 'err' in args.output_types:
err_writer.add_batch(F.interpolate(err, src.shape[2:], mode='bilinear', align_corners=False))
if 'ref' in args.output_types:
ref_writer.add_batch(F.interpolate(ref, src.shape[2:], mode='nearest'))
return './output/com.mp4'
# 读取本地视频文件的二进制数据
def get_video_content(video_path):
with open(video_path, 'rb') as file:
video_content = file.read()
return video_content
# 假设你的视频文件路径是'./local_video.mp4'
local_video_path = './output/com.mp4'
local_video_content = get_video_content(local_video_path)
# 创建Gradio界面
with gr.Blocks() as demo:
gr.Markdown(
'''
This space displays how to perform Video Matting.
## How to use this Space?
- Upload a video, preferably with a duration of less than 10 seconds.
- Upload a photo of the background of video.
- You will receive the result of the face swap after 5-10 minutes.
- Click the 'clear' button to clear all the files.
## Examples
- You can get the test examples from our [Roop Dataset Repo.](https://huggingface.co/datasets/SJTU-TES/VideoMatting)
'''
)
with gr.Row():
video_src = gr.File(label="Upload Source Video (.mp4)", type="binary", file_types=["mp4"])
video_bgr = gr.File(label="Upload Background Image (.png)", type="binary", file_types=["png"])
with gr.Row():
output_video = gr.Video(label="Result Video")
submit_button = gr.Button("Start Matting")
# def download_video(video_path):
# if os.path.exists(video_path):
# with open(video_path, 'rb') as file:
# video_data = file.read()
# return video_data, "video/mp4", os.path.basename(video_path)
# else:
# return "Not Found", "text/plain", None
def clear_outputs():
output_video.update(value=None)
submit_button.click(
fn=video_matting,
inputs=[video_src, video_bgr],
outputs=[output_video]
)
# download_button = gr.Button("Download")
# download_button.click(
# download_video,
# inputs=[output_video], # 从视频组件传递视频路径
# outputs=[gr.File(label="Download")]
# )
clear_button = gr.Button("Clear")
clear_button.click(fn=clear_outputs, inputs=[], outputs=[])
# I donot know what happened? ?
if __name__ == "__main__":
demo.launch()