Spaces:

sitammeur
/

VidiQA

Running on Zero

App Files Files Community

sitammeur commited on Aug 19

Commit

0dc8cc8

•

1 Parent(s): 390ae4a

Upload 5 files

Browse files

Files changed (5) hide show

src/app/__init__.py +0 -0
src/app/model.py +45 -0
src/app/response.py +79 -0
src/utils/__init__.py +0 -0
src/utils/video_processing.py +65 -0

src/app/__init__.py ADDED Viewed

File without changes

src/app/model.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# Necessary imports
+import sys
+from typing import Any
+import torch
+from transformers import AutoModel, AutoTokenizer
+# Local imports
+from src.logger import logging
+from src.exception import CustomExceptionHandling
+def load_model_and_tokenizer(model_name: str, device: str) -> Any:
+    """
+    Load the model and tokenizer.
+    Args:
+        - model_name (str): The name of the model to load.
+        - device (str): The device to load the model onto.
+    Returns:
+        - model: The loaded model.
+        - tokenizer: The loaded tokenizer.
+    """
+    try:
+        # Load the model and tokenizer
+        model = AutoModel.from_pretrained(
+            model_name,
+            trust_remote_code=True,
+            attn_implementation="sdpa",
+            torch_dtype=torch.bfloat16,
+        )
+        model = model.to(device=device)
+        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+        model.eval()
+        # Log the successful loading of the model and tokenizer
+        logging.info("Model and tokenizer loaded successfully.")
+        # Return the model and tokenizer
+        return model, tokenizer
+    # Handle exceptions that may occur during model and tokenizer loading
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e

src/app/response.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# Necessary imports
+import sys
+from typing import Any, Dict
+import spaces
+# Local imports
+from src.utils.video_processing import encode_video
+from src.config import (
+    device,
+    model_name,
+    system_prompt,
+    sampling,
+    stream,
+    top_p,
+    top_k,
+    temperature,
+    repetition_penalty,
+    max_new_tokens,
+)
+from src.app.model import load_model_and_tokenizer
+from src.logger import logging
+from src.exception import CustomExceptionHandling
+# Model and tokenizer
+model, tokenizer = load_model_and_tokenizer(model_name, device)
+@spaces.GPU()
+def describe_video(video: str, question: str) -> str:
+    """
+    Describes a video by generating an answer to a given question.
+    Args:
+        - video (str): The path to the video file.
+        - question (str): The question to be answered about the video.
+    Returns:
+        str: The generated answer to the question.
+    """
+    try:
+        # Encode the video frames
+        frames = encode_video(video)
+        # Message format for the model
+        msgs = [{"role": "user", "content": frames + [question]}]
+        # Set decode params for video
+        params: Dict[str, Any] = {
+            "use_image_id": False,
+            "max_slice_nums": 1,  # Use 1 if CUDA OOM and video resolution > 448*448
+        }
+        # Generate the answer
+        answer = model.chat(
+            image=None,
+            msgs=msgs,
+            tokenizer=tokenizer,
+            sampling=sampling,
+            stream=stream,
+            top_p=top_p,
+            top_k=top_k,
+            temperature=temperature,
+            repetition_penalty=repetition_penalty,
+            max_new_tokens=max_new_tokens,
+            system_prompt=system_prompt,
+            **params
+        )
+        # Log the successful generation of the answer
+        logging.info("Answer generated successfully.")
+        # Return the answer
+        return " ".join(answer)
+    # Handle exceptions that may occur during answer generation
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e

src/utils/__init__.py ADDED Viewed

File without changes

src/utils/video_processing.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# Necessary imports
+import sys
+from PIL import Image
+from decord import VideoReader, cpu
+from typing import List
+# Local imports
+from src.logger import logging
+from src.exception import CustomExceptionHandling
+# Constants
+MAX_NUM_FRAMES = 64  # If CUDA OOM, set a smaller number
+def encode_video(video_path: str) -> List[Image.Image]:
+    """
+    Encodes a video file into a list of frames.
+    Args:
+        video_path (str): The path to the video file.
+    Returns:
+        list: A list of frames, where each frame is represented as an Image object.
+    """
+    def uniform_sample(l: List, n: int) -> List:
+        """
+        Uniformly samples elements from a list.
+        Args:
+            - l (list): The input list.
+            - n (int): The number of elements to sample.
+        Returns:
+            list: A list of sampled elements.
+        """
+        gap = len(l) / n
+        idxs = [int(i * gap + gap / 2) for i in range(n)]
+        return [l[i] for i in idxs]
+    try:
+        # Read the video file and sample frames
+        vr = VideoReader(video_path, ctx=cpu(0))
+        sample_fps = round(vr.get_avg_fps() / 1)  # FPS
+        frame_idx = [i for i in range(0, len(vr), sample_fps)]
+        # Uniformly sample frames if the number of frames is too large
+        if len(frame_idx) > MAX_NUM_FRAMES:
+            frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
+        # Extract frames from the video
+        frames = vr.get_batch(frame_idx).asnumpy()
+        frames = [Image.fromarray(v.astype("uint8")) for v in frames]
+        # Log the successful encoding of the video
+        logging.info("Video encoded successfully.")
+        # Return video frames
+        return frames
+    # Handle exceptions that may occur during video encoding
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e