Aliayub1995
/

VideoLLaMA2-7B

Visual Question Answering

videollama2_mistral

text-generation

multimodal large language model

large video-language model

Inference Endpoints

Model card Files Files and versions Community

VideoLLaMA2-7B / handler.py

Aliayub1995's picture

Update handler.py

4d795e7 verified 3 months ago

2.11 kB

	from typing import Dict, List, Any
	import sys
	sys.path.append('./')
	from videollama2 import model_init, mm_infer
	from videollama2.utils import disable_torch_init
	import logging
	import os

	class EndpointHandler:
	def __init__(self, path: str = ""):
	"""
	Initialize the handler by loading the model and any other necessary components.

	Args:
	path (str): The path to the model or other necessary files.
	"""
	disable_torch_init()
	self.model_path = 'Aliayub1995/VideoLLaMA2-7B'
	self.model, self.processor, self.tokenizer = model_init(self.model_path)

	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	logging.info(f"Received data: {data}") # Debugging: Print received data
	# Initialize variables
	current_path = os.getcwd()
	logging.info(f"Current Path: {current_path}")
	dir = os.walk("./app")
	# Iterate through the generator
	for dirpath, dirnames, filenames in dir:
	logging.info(f"Current Path: {dirpath}")
	logging.info(f"Directories: {dirnames}")
	logging.info(f"Files: {filenames}")
	logging.info("-" * 40)
	logging.info(f"Directory struct: {dir}")
	modal = None
	modal_path = None
	instruct = None

	# Extract input data
	inputs = data.get("inputs", data)
	modal = inputs.get("modal", "video")
	modal_path = inputs.get("modal_path", "")
	instruct = inputs.get("instruct", "")

	logging.info(f"Modal: {modal}, Modal Path: {modal_path}, Instruct: {instruct}") # Debugging: Print extracted values

	if not modal_path or not instruct:
	raise ValueError("Both 'modal_path' and 'instruct' must be provided in the input data.")

	# Perform inference
	output = mm_infer(
	self.processor[modal](modal_path),
	instruct,
	model=self.model,
	tokenizer=self.tokenizer,
	do_sample=False,
	modal=modal
	)

	return [{"output": output}]