File size: 2,628 Bytes
d5c679f 975cc83 d5c679f 975cc83 d5c679f 975cc83 d5c679f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from __future__ import annotations
import argparse
import os
import tempfile
from typing import Callable
from gradio_client import Client
# import loguru
from groq import Groq
from loguru import logger
from hf import hf_transcript, get_whisper_hf_client
from logs import configure_logging
from rate_limit import rate_limit_bypass
# from remote_whisper import hf_transcribe_audio
from settings import app_settings
from transcribe import get_full_transcript, parse_audio
@rate_limit_bypass(sleep_time=20)
def summarize_groq(client: Groq, text: str):
completion = client.chat.completions.create(
model=app_settings.model,
messages=[
{
"role": "system",
"content": app_settings.system_prompt,
},
{
"role": "user",
"content": f"Кратко перескажи видео по транскрипции, "
f"как будто это только часть видео. "
f"Используй оформление и ненумерованные пункты. "
f"Оформи название блока через **Название**"
f"Не пиши о том, что это краткое изложение. "
f"Вот транскрипция: {text}",
}
],
temperature=app_settings.temperature,
max_tokens=app_settings.max_tokens,
top_p=1,
stream=False,
stop=None,
)
return completion.choices[0].message.content
def summarize(
texts: list[str],
client: Client | Groq,
summarizer: Callable[[Client | Groq, str], str] = summarize_groq,
) -> str:
logger.info("Summarizing transcript...")
result = ""
i = 1
for chunk in texts:
logger.info(f"Summarizing chunk #{i}")
i += 1
result += summarizer(client, chunk)
return result
if __name__ == "__main__":
configure_logging()
parser = argparse.ArgumentParser("Video transcript summarizer")
parser.add_argument("video_path", help="Path to video file", type=str)
args = parser.parse_args()
groq_client = Groq(api_key=app_settings.groq_api_key)
hf_client = get_whisper_hf_client()
with tempfile.TemporaryDirectory() as tmpdirname:
parse_audio(args.video_path, os.path.join(tmpdirname, "audio.mp3"))
transcript = get_full_transcript(tmpdirname, hf_client, one_file_transcript_func=hf_transcript)
print(summarize(transcript, groq_client))
|