Spaces:

ardha27
/

VideoAnalyzer

Runtime error

App Files Files Community

Zeph27 commited on Aug 19

Commit

e95a3a8

•

0 Parent(s):

init

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +78 -0
requirements.txt +7 -0
tiktok.py +26 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv/

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import gradio as gr
+from transformers import AutoModel, AutoTokenizer
+import torch
+from decord import VideoReader, cpu
+import os
+import spaces
+# Load the model and tokenizer
+model_name = "openbmb/MiniCPM-V-2_6-int4"
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModel.from_pretrained(model_name, trust_remote_code=True, device_map="auto")
+model.eval()
+MAX_NUM_FRAMES = 64
+VIDEO_EXTENSIONS = {'.mp4', '.mkv', '.mov', '.avi', '.flv', '.wmv', '.webm', '.m4v'}
+def get_file_extension(filename):
+    return os.path.splitext(filename)[1].lower()
+def is_video(filename):
+    return get_file_extension(filename) in VIDEO_EXTENSIONS
+def encode_video(video):
+    def uniform_sample(l, n):
+        gap = len(l) / n
+        idxs = [int(i * gap + gap / 2) for i in range(n)]
+        return [l[i] for i in idxs]
+    if hasattr(video, 'path'):
+        video_path = video.path
+    else:
+        video_path = video.file.path
+    vr = VideoReader(video_path, ctx=cpu(0))
+    total_frames = len(vr)
+    if total_frames <= MAX_NUM_FRAMES:
+        frame_idxs = list(range(total_frames))
+    else:
+        frame_idxs = uniform_sample(range(total_frames), MAX_NUM_FRAMES)
+    frames = vr.get_batch(frame_idxs).asnumpy()
+    return frames
+@spaces.GPU
+def analyze_video(video, prompt):
+    if not is_video(video.name):
+        return "Please upload a valid video file."
+    frames = encode_video(video)
+    # Prepare the frames for the model
+    inputs = model.vpm(frames)
+    # Generate the caption with the user's prompt
+    with torch.no_grad():
+        outputs = model.generate(inputs=inputs, tokenizer=tokenizer, max_new_tokens=50, prompt=prompt)
+    # Decode the output
+    caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return caption
+# Create the Gradio interface using Blocks
+with gr.Blocks(title="Video Analyzer using MiniCPM-V-2.6-int4") as iface:
+    gr.Markdown("# Video Analyzer using MiniCPM-V-2.6-int4")
+    gr.Markdown("Upload a video to get an analysis using the MiniCPM-V-2.6-int4 model.")
+    gr.Markdown("This model uses 4-bit quantization for improved efficiency. [Learn more](https://huggingface.co/openbmb/MiniCPM-V-2_6-int4)")
+    with gr.Row():
+        video_input = gr.Video()
+        prompt_input = gr.Textbox(label="Prompt (optional)", placeholder="Enter a prompt to guide the analysis...")
+        analysis_output = gr.Textbox(label="Video Analysis")
+    analyze_button = gr.Button("Analyze Video")
+    analyze_button.click(fn=analyze_video, inputs=[video_input, prompt_input], outputs=analysis_output)
+# Launch the interface
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Pillow==10.1.0
+torch==2.1.2
+torchvision==0.16.2
+transformers==4.40.0
+sentencepiece==0.1.99
+accelerate==0.30.1
+bitsandbytes==0.43.1

tiktok.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import asyncio
+from douyin_tiktok_scraper.scraper import Scraper
+import traceback
+api = Scraper()
+async def hybrid_parsing(url: str) -> dict:
+    try:
+        result = await api.hybrid_parsing(url)
+        print(f"The hybrid parsing result:\n {result}")
+        return result
+    except Exception as e:
+        print(f"An error occurred: {str(e)}")
+        print("Traceback:")
+        traceback.print_exc()
+        return None
+async def main():
+    url = input("Paste Douyin/TikTok/Bilibili share URL here: ")
+    result = await hybrid_parsing(url)
+    if result:
+        print("Parsing successful!")
+    else:
+        print("Parsing failed.")
+asyncio.run(main())