sitammeur commited on
Commit
0dc8cc8
1 Parent(s): 390ae4a

Upload 5 files

Browse files
src/app/__init__.py ADDED
File without changes
src/app/model.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Necessary imports
2
+ import sys
3
+ from typing import Any
4
+ import torch
5
+ from transformers import AutoModel, AutoTokenizer
6
+
7
+ # Local imports
8
+ from src.logger import logging
9
+ from src.exception import CustomExceptionHandling
10
+
11
+
12
+ def load_model_and_tokenizer(model_name: str, device: str) -> Any:
13
+ """
14
+ Load the model and tokenizer.
15
+
16
+ Args:
17
+ - model_name (str): The name of the model to load.
18
+ - device (str): The device to load the model onto.
19
+
20
+ Returns:
21
+ - model: The loaded model.
22
+ - tokenizer: The loaded tokenizer.
23
+ """
24
+ try:
25
+ # Load the model and tokenizer
26
+ model = AutoModel.from_pretrained(
27
+ model_name,
28
+ trust_remote_code=True,
29
+ attn_implementation="sdpa",
30
+ torch_dtype=torch.bfloat16,
31
+ )
32
+ model = model.to(device=device)
33
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
34
+ model.eval()
35
+
36
+ # Log the successful loading of the model and tokenizer
37
+ logging.info("Model and tokenizer loaded successfully.")
38
+
39
+ # Return the model and tokenizer
40
+ return model, tokenizer
41
+
42
+ # Handle exceptions that may occur during model and tokenizer loading
43
+ except Exception as e:
44
+ # Custom exception handling
45
+ raise CustomExceptionHandling(e, sys) from e
src/app/response.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Necessary imports
2
+ import sys
3
+ from typing import Any, Dict
4
+ import spaces
5
+
6
+ # Local imports
7
+ from src.utils.video_processing import encode_video
8
+ from src.config import (
9
+ device,
10
+ model_name,
11
+ system_prompt,
12
+ sampling,
13
+ stream,
14
+ top_p,
15
+ top_k,
16
+ temperature,
17
+ repetition_penalty,
18
+ max_new_tokens,
19
+ )
20
+ from src.app.model import load_model_and_tokenizer
21
+ from src.logger import logging
22
+ from src.exception import CustomExceptionHandling
23
+
24
+
25
+ # Model and tokenizer
26
+ model, tokenizer = load_model_and_tokenizer(model_name, device)
27
+
28
+
29
+ @spaces.GPU()
30
+ def describe_video(video: str, question: str) -> str:
31
+ """
32
+ Describes a video by generating an answer to a given question.
33
+
34
+ Args:
35
+ - video (str): The path to the video file.
36
+ - question (str): The question to be answered about the video.
37
+
38
+ Returns:
39
+ str: The generated answer to the question.
40
+ """
41
+ try:
42
+ # Encode the video frames
43
+ frames = encode_video(video)
44
+
45
+ # Message format for the model
46
+ msgs = [{"role": "user", "content": frames + [question]}]
47
+
48
+ # Set decode params for video
49
+ params: Dict[str, Any] = {
50
+ "use_image_id": False,
51
+ "max_slice_nums": 1, # Use 1 if CUDA OOM and video resolution > 448*448
52
+ }
53
+
54
+ # Generate the answer
55
+ answer = model.chat(
56
+ image=None,
57
+ msgs=msgs,
58
+ tokenizer=tokenizer,
59
+ sampling=sampling,
60
+ stream=stream,
61
+ top_p=top_p,
62
+ top_k=top_k,
63
+ temperature=temperature,
64
+ repetition_penalty=repetition_penalty,
65
+ max_new_tokens=max_new_tokens,
66
+ system_prompt=system_prompt,
67
+ **params
68
+ )
69
+
70
+ # Log the successful generation of the answer
71
+ logging.info("Answer generated successfully.")
72
+
73
+ # Return the answer
74
+ return " ".join(answer)
75
+
76
+ # Handle exceptions that may occur during answer generation
77
+ except Exception as e:
78
+ # Custom exception handling
79
+ raise CustomExceptionHandling(e, sys) from e
src/utils/__init__.py ADDED
File without changes
src/utils/video_processing.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Necessary imports
2
+ import sys
3
+ from PIL import Image
4
+ from decord import VideoReader, cpu
5
+ from typing import List
6
+
7
+ # Local imports
8
+ from src.logger import logging
9
+ from src.exception import CustomExceptionHandling
10
+
11
+
12
+ # Constants
13
+ MAX_NUM_FRAMES = 64 # If CUDA OOM, set a smaller number
14
+
15
+
16
+ def encode_video(video_path: str) -> List[Image.Image]:
17
+ """
18
+ Encodes a video file into a list of frames.
19
+
20
+ Args:
21
+ video_path (str): The path to the video file.
22
+
23
+ Returns:
24
+ list: A list of frames, where each frame is represented as an Image object.
25
+ """
26
+
27
+ def uniform_sample(l: List, n: int) -> List:
28
+ """
29
+ Uniformly samples elements from a list.
30
+
31
+ Args:
32
+ - l (list): The input list.
33
+ - n (int): The number of elements to sample.
34
+
35
+ Returns:
36
+ list: A list of sampled elements.
37
+ """
38
+ gap = len(l) / n
39
+ idxs = [int(i * gap + gap / 2) for i in range(n)]
40
+ return [l[i] for i in idxs]
41
+
42
+ try:
43
+ # Read the video file and sample frames
44
+ vr = VideoReader(video_path, ctx=cpu(0))
45
+ sample_fps = round(vr.get_avg_fps() / 1) # FPS
46
+ frame_idx = [i for i in range(0, len(vr), sample_fps)]
47
+
48
+ # Uniformly sample frames if the number of frames is too large
49
+ if len(frame_idx) > MAX_NUM_FRAMES:
50
+ frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
51
+
52
+ # Extract frames from the video
53
+ frames = vr.get_batch(frame_idx).asnumpy()
54
+ frames = [Image.fromarray(v.astype("uint8")) for v in frames]
55
+
56
+ # Log the successful encoding of the video
57
+ logging.info("Video encoded successfully.")
58
+
59
+ # Return video frames
60
+ return frames
61
+
62
+ # Handle exceptions that may occur during video encoding
63
+ except Exception as e:
64
+ # Custom exception handling
65
+ raise CustomExceptionHandling(e, sys) from e