from llama_cpp import Llama def get_llm(model_path: str = "models/Meta-Llama-3.1-8B-Instruct-IQ4_XS.gguf") -> Llama: return Llama( model_path=model_path, n_gpu_layers=-1, ) def summarize_transcript(llm: Llama, transcript: str) -> str: summary = llm.create_chat_completion( messages=[ { "role": "user", "content": f"Summarize the following video transcript: {transcript}", } ] ) return summary