from typing import Dict, List, Any import sys sys.path.append('./') from videollama2 import model_init, mm_infer from videollama2.utils import disable_torch_init import logging import os class EndpointHandler: def __init__(self, path: str = ""): """ Initialize the handler by loading the model and any other necessary components. Args: path (str): The path to the model or other necessary files. """ disable_torch_init() self.model_path = 'Aliayub1995/VideoLLaMA2-7B' self.model, self.processor, self.tokenizer = model_init(self.model_path) def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: logging.info(f"Received data: {data}") # Debugging: Print received data # Initialize variables current_path = os.getcwd() logging.info(f"Current Path: {current_path}") dir = os.walk("./app") # Iterate through the generator for dirpath, dirnames, filenames in dir: logging.info(f"Current Path: {dirpath}") logging.info(f"Directories: {dirnames}") logging.info(f"Files: {filenames}") logging.info("-" * 40) logging.info(f"Directory struct: {dir}") modal = None modal_path = None instruct = None # Extract input data inputs = data.get("inputs", data) modal = inputs.get("modal", "video") modal_path = inputs.get("modal_path", "") instruct = inputs.get("instruct", "") logging.info(f"Modal: {modal}, Modal Path: {modal_path}, Instruct: {instruct}") # Debugging: Print extracted values if not modal_path or not instruct: raise ValueError("Both 'modal_path' and 'instruct' must be provided in the input data.") # Perform inference output = mm_infer( self.processor[modal](modal_path), instruct, model=self.model, tokenizer=self.tokenizer, do_sample=False, modal=modal ) return [{"output": output}]