xsestech commited on
Commit
d5c679f
0 Parent(s):

Created app

Browse files
.gitignore ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### macOS template
2
+ # General
3
+ .DS_Store
4
+ .AppleDouble
5
+ .LSOverride
6
+
7
+ # Icon must end with two \r
8
+ Icon
9
+
10
+ # Thumbnails
11
+ ._*
12
+
13
+ # Files that might appear in the root of a volume
14
+ .DocumentRevisions-V100
15
+ .fseventsd
16
+ .Spotlight-V100
17
+ .TemporaryItems
18
+ .Trashes
19
+ .VolumeIcon.icns
20
+ .com.apple.timemachine.donotpresent
21
+
22
+ # Directories potentially created on remote AFP share
23
+ .AppleDB
24
+ .AppleDesktop
25
+ Network Trash Folder
26
+ Temporary Items
27
+ .apdisk
28
+
29
+ ### Python template
30
+ # Byte-compiled / optimized / DLL files
31
+ __pycache__/
32
+ *.py[cod]
33
+ *$py.class
34
+
35
+ # C extensions
36
+ *.so
37
+
38
+ # Distribution / packaging
39
+ .Python
40
+ build/
41
+ develop-eggs/
42
+ dist/
43
+ downloads/
44
+ eggs/
45
+ .eggs/
46
+ lib/
47
+ lib64/
48
+ parts/
49
+ sdist/
50
+ var/
51
+ wheels/
52
+ share/python-wheels/
53
+ *.egg-info/
54
+ .installed.cfg
55
+ *.egg
56
+ MANIFEST
57
+
58
+ # PyInstaller
59
+ # Usually these files are written by a python script from a template
60
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
61
+ *.manifest
62
+ *.spec
63
+
64
+ # Installer logs
65
+ pip-log.txt
66
+ pip-delete-this-directory.txt
67
+
68
+ # Unit test / coverage reports
69
+ htmlcov/
70
+ .tox/
71
+ .nox/
72
+ .coverage
73
+ .coverage.*
74
+ .cache
75
+ nosetests.xml
76
+ coverage.xml
77
+ *.cover
78
+ *.py,cover
79
+ .hypothesis/
80
+ .pytest_cache/
81
+ cover/
82
+
83
+ # Translations
84
+ *.mo
85
+ *.pot
86
+
87
+ # Django stuff:
88
+ *.log
89
+ local_settings.py
90
+ db.sqlite3
91
+ db.sqlite3-journal
92
+
93
+ # Flask stuff:
94
+ instance/
95
+ .webassets-cache
96
+
97
+ # Scrapy stuff:
98
+ .scrapy
99
+
100
+ # Sphinx documentation
101
+ docs/_build/
102
+
103
+ # PyBuilder
104
+ .pybuilder/
105
+ target/
106
+
107
+ # Jupyter Notebook
108
+ .ipynb_checkpoints
109
+
110
+ # IPython
111
+ profile_default/
112
+ ipython_config.py
113
+
114
+ # pyenv
115
+ # For a library or package, you might want to ignore these files since the code is
116
+ # intended to run in multiple environments; otherwise, check them in:
117
+ # .python-version
118
+
119
+ # pipenv
120
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
121
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
122
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
123
+ # install all needed dependencies.
124
+ #Pipfile.lock
125
+
126
+ # poetry
127
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
128
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
129
+ # commonly ignored for libraries.
130
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
131
+ #poetry.lock
132
+
133
+ # pdm
134
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
135
+ #pdm.lock
136
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
137
+ # in version control.
138
+ # https://pdm.fming.dev/#use-with-ide
139
+ .pdm.toml
140
+
141
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
142
+ __pypackages__/
143
+
144
+ # Celery stuff
145
+ celerybeat-schedule
146
+ celerybeat.pid
147
+
148
+ # SageMath parsed files
149
+ *.sage.py
150
+
151
+ # Environments
152
+ .env
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ #.idea/
190
+
191
+ ### PyCharm+all template
192
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
193
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
194
+
195
+ # User-specific stuff
196
+ .idea/**/workspace.xml
197
+ .idea/**/tasks.xml
198
+ .idea/**/usage.statistics.xml
199
+ .idea/**/dictionaries
200
+ .idea/**/shelf
201
+
202
+ # AWS User-specific
203
+ .idea/**/aws.xml
204
+
205
+ # Generated files
206
+ .idea/**/contentModel.xml
207
+
208
+ # Sensitive or high-churn files
209
+ .idea/**/dataSources/
210
+ .idea/**/dataSources.ids
211
+ .idea/**/dataSources.local.xml
212
+ .idea/**/sqlDataSources.xml
213
+ .idea/**/dynamic.xml
214
+ .idea/**/uiDesigner.xml
215
+ .idea/**/dbnavigator.xml
216
+
217
+ # Gradle
218
+ .idea/**/gradle.xml
219
+ .idea/**/libraries
220
+
221
+ # Gradle and Maven with auto-import
222
+ # When using Gradle or Maven with auto-import, you should exclude module files,
223
+ # since they will be recreated, and may cause churn. Uncomment if using
224
+ # auto-import.
225
+ # .idea/artifacts
226
+ # .idea/compiler.xml
227
+ # .idea/jarRepositories.xml
228
+ # .idea/modules.xml
229
+ # .idea/*.iml
230
+ # .idea/modules
231
+ # *.iml
232
+ # *.ipr
233
+
234
+ # CMake
235
+ cmake-build-*/
236
+
237
+ # Mongo Explorer plugin
238
+ .idea/**/mongoSettings.xml
239
+
240
+ # File-based project format
241
+ *.iws
242
+
243
+ # IntelliJ
244
+ out/
245
+
246
+ # mpeltonen/sbt-idea plugin
247
+ .idea_modules/
248
+
249
+ # JIRA plugin
250
+ atlassian-ide-plugin.xml
251
+
252
+ # Cursive Clojure plugin
253
+ .idea/replstate.xml
254
+
255
+ # SonarLint plugin
256
+ .idea/sonarlint/
257
+
258
+ # Crashlytics plugin (for Android Studio and IntelliJ)
259
+ com_crashlytics_export_strings.xml
260
+ crashlytics.properties
261
+ crashlytics-build.properties
262
+ fabric.properties
263
+
264
+ # Editor-based Rest Client
265
+ .idea/httpRequests
266
+
267
+ # Android studio 3.1+ serialized cache file
268
+ .idea/caches/build_file_checksums.ser
269
+
270
+ examples
271
+ flagged
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/VTT.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="$USER_HOME$/miniconda3/envs/vtt-remote" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
5
+ <inspection_tool class="PyCompatibilityInspection" enabled="true" level="WARNING" enabled_by_default="true">
6
+ <option name="ourVersions">
7
+ <value>
8
+ <list size="8">
9
+ <item index="0" class="java.lang.String" itemvalue="3.12" />
10
+ <item index="1" class="java.lang.String" itemvalue="3.6" />
11
+ <item index="2" class="java.lang.String" itemvalue="3.7" />
12
+ <item index="3" class="java.lang.String" itemvalue="3.8" />
13
+ <item index="4" class="java.lang.String" itemvalue="3.9" />
14
+ <item index="5" class="java.lang.String" itemvalue="3.10" />
15
+ <item index="6" class="java.lang.String" itemvalue="3.11" />
16
+ <item index="7" class="java.lang.String" itemvalue="3.13" />
17
+ </list>
18
+ </value>
19
+ </option>
20
+ </inspection_tool>
21
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false">
22
+ <option name="ignoredPackages">
23
+ <value>
24
+ <list size="6">
25
+ <item index="0" class="java.lang.String" itemvalue="dnspython" />
26
+ <item index="1" class="java.lang.String" itemvalue="pydantic" />
27
+ <item index="2" class="java.lang.String" itemvalue="alembic" />
28
+ <item index="3" class="java.lang.String" itemvalue="certifi" />
29
+ <item index="4" class="java.lang.String" itemvalue="pydantic-core" />
30
+ <item index="5" class="java.lang.String" itemvalue="click" />
31
+ </list>
32
+ </value>
33
+ </option>
34
+ </inspection_tool>
35
+ <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
36
+ <option name="ignoredErrors">
37
+ <list>
38
+ <option value="N803" />
39
+ <option value="N802" />
40
+ </list>
41
+ </option>
42
+ </inspection_tool>
43
+ <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
44
+ <option name="ignoredIdentifiers">
45
+ <list>
46
+ <option value="rabbit_backend.util.abc_registry.ABCRegistry.*" />
47
+ </list>
48
+ </option>
49
+ </inspection_tool>
50
+ </profile>
51
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.11 (2)" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/miniconda3/envs/vtt-remote" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/VTT.iml" filepath="$PROJECT_DIR$/.idea/VTT.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+
4
+ import gradio as gr
5
+ from groq import Groq
6
+
7
+ from hf import hf_transcript, get_whisper_hf_client
8
+ from logs import configure_logging
9
+ from remote_llm import summarize
10
+ from transcribe import parse_audio, get_full_transcript, groq_transcript
11
+
12
+
13
+ def gradio_pipeline(video: gr.Video, whisper_inference, groq_api_token):
14
+ groq_client = Groq(api_key=groq_api_token)
15
+ hf_client = get_whisper_hf_client()
16
+ print(video)
17
+ with tempfile.TemporaryDirectory() as tmpdirname:
18
+ parse_audio(video, os.path.join(tmpdirname, "audio.mp3"))
19
+ if whisper_inference == "hf":
20
+ transcript = get_full_transcript(tmpdirname, hf_client, one_file_transcript_func=hf_transcript)
21
+ elif whisper_inference == "groq":
22
+ transcript = get_full_transcript(tmpdirname, groq_client, one_file_transcript_func=groq_transcript)
23
+ return summarize(transcript, groq_client)
24
+
25
+
26
+ if __name__ == "__main__":
27
+ configure_logging()
28
+ demo = gr.Interface(
29
+ fn=gradio_pipeline,
30
+ inputs=[
31
+ gr.Video(),
32
+ gr.Radio(choices=["groq", "hf"], value="hf", label="Whisper inference"),
33
+ gr.Text(max_lines=1, type="password",
34
+ placeholder="Enter your groq API key",
35
+ label="groq API key")
36
+ ],
37
+ outputs=gr.Markdown(
38
+ value="# Here will be the summary...",
39
+ label="Summary",
40
+ show_copy_button=True,
41
+ ),
42
+ allow_flagging="never")
43
+ demo.launch()
hf.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client, handle_file
2
+ from loguru import logger
3
+
4
+ from rate_limit import rate_limit_bypass
5
+
6
+
7
+ def get_whisper_hf_client() -> Client:
8
+ API_URL = "sanchit-gandhi/whisper-jax-spaces"
9
+ return Client(API_URL)
10
+
11
+
12
+ @rate_limit_bypass(sleep_time=2)
13
+ def hf_transcript(client: Client, audio_path: str):
14
+ text, runtime = client.predict(
15
+ inputs=handle_file(audio_path),
16
+ task="transcribe",
17
+ return_timestamps=False,
18
+ api_name="/predict_1",
19
+ )
20
+ logger.info(text)
21
+ return text
llm.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+
3
+
4
+ def get_llm(model_path: str = "models/Meta-Llama-3.1-8B-Instruct-IQ4_XS.gguf") -> Llama:
5
+ return Llama(
6
+ model_path=model_path,
7
+ n_gpu_layers=-1,
8
+ )
9
+
10
+
11
+ def summarize_transcript(llm: Llama, transcript: str) -> str:
12
+ summary = llm.create_chat_completion(
13
+ messages=[
14
+ {
15
+ "role": "user",
16
+ "content": f"Summarize the following video transcript: {transcript}",
17
+ }
18
+ ]
19
+ )
20
+ return summary
local_transcript.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import faulthandler
3
+ import gc
4
+ import os
5
+ import tempfile
6
+
7
+ import torch
8
+ import whisperx
9
+
10
+ from whisperx.asr import FasterWhisperPipeline
11
+
12
+
13
+
14
+ def get_device():
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ # device = "mps" if torch.backends.mps.is_available() else device
17
+ return device
18
+
19
+
20
+ def generate_subtitles_from_audio(
21
+ audio_file_path: str,
22
+ model: FasterWhisperPipeline,
23
+ batch_size: int = 8
24
+ ):
25
+ audio = whisperx.load_audio(audio_file_path)
26
+ result = model.transcribe(audio, batch_size=batch_size, language="ru", )
27
+ return result
28
+
29
+
30
+ def generate_subtitles_from_video(
31
+ video_path: str,
32
+ model_name: str = "base",
33
+ batch_size: int = 8,
34
+ compute_type: str = "int8",
35
+ ):
36
+ _, audio_file = tempfile.mkstemp()
37
+
38
+ device = get_device()
39
+
40
+
41
+ print("Loading model:")
42
+ model = whisperx.load_model(model_name, device, compute_type=compute_type, language="ru")
43
+ print("Parsing audio:")
44
+ parse_audio(video_path, audio_file)
45
+ print("Generating subtitles:")
46
+ result = generate_subtitles_from_audio(audio_file, model, batch_size=batch_size)
47
+
48
+ os.remove(audio_file)
49
+ del model
50
+ gc.collect()
51
+ return result
52
+
53
+
54
+ def add_whisper_args(arg_parser: argparse.ArgumentParser):
55
+ arg_parser.add_argument("video", help="video file")
56
+ arg_parser.add_argument("--compute_type", help="Base type for model", default="int8",
57
+ choices=["int8", "float16", "float32"])
58
+ arg_parser.add_argument("--whisper_model", help="model to use", default="large-v2")
59
+ arg_parser.add_argument("--batch_size", help="Batch size for inference", default=4, type=int)
60
+
61
+
62
+ if __name__ == "__main__":
63
+ faulthandler.enable()
64
+ parser = argparse.ArgumentParser(description="Get video subtitles from a video")
65
+ add_whisper_args(parser)
66
+ args = parser.parse_args()
67
+ print(generate_subtitles_from_video(args.video, args.whisper_model, args.batch_size, args.compute_type))
logs.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from typing import Union
4
+
5
+ from loguru import logger
6
+
7
+ from settings import app_settings
8
+
9
+
10
+ class InterceptHandler(logging.Handler):
11
+ """
12
+ Default handler from examples in loguru documentation.
13
+
14
+ This handler intercepts all log requests and
15
+ passes them to loguru.
16
+
17
+ For more info see:
18
+ https://loguru.readthedocs.io/en/stable/overview.html#entirely-compatible-with-standard-logging
19
+ """
20
+
21
+ def emit(self, record: logging.LogRecord) -> None: # pragma: no cover
22
+ """Propagates logs to loguru.
23
+
24
+ Parameters
25
+ ----------
26
+ record
27
+ record to log.
28
+ """
29
+ try:
30
+ level: Union[str, int] = logger.level(record.levelname).name
31
+ except ValueError:
32
+ level = record.levelno
33
+
34
+ # Find caller from where originated the logged message
35
+ frame, depth = logging.currentframe(), 2
36
+ while frame.f_code.co_filename == logging.__file__:
37
+ frame = frame.f_back # type: ignore
38
+ depth += 1
39
+
40
+ logger.opt(depth=depth, exception=record.exc_info).log(
41
+ level,
42
+ record.getMessage(),
43
+ )
44
+
45
+
46
+ def configure_logging() -> None: # pragma: no cover
47
+ """Configures logging."""
48
+ intercept_handler = InterceptHandler()
49
+
50
+ logging.basicConfig(handlers=[intercept_handler], level=logging.NOTSET)
51
+
52
+ for logger_name in logging.root.manager.loggerDict:
53
+ if logger_name.startswith("uvicorn."):
54
+ logging.getLogger(logger_name).handlers = []
55
+
56
+ # change handler for default uvicorn logger
57
+ logging.getLogger("uvicorn").handlers = [intercept_handler]
58
+ logging.getLogger("uvicorn.access").handlers = [intercept_handler]
59
+
60
+ # set logs output, level and format
61
+ logger.remove()
62
+ logger.add(
63
+ sys.stdout,
64
+ level=app_settings.log_level,
65
+ )
rate_limit.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import time
3
+
4
+ from groq import RateLimitError
5
+ from httpx import ReadTimeout
6
+ from loguru import logger
7
+
8
+
9
+ def rate_limit_bypass(sleep_time: int = 1, max_retries: int = 10):
10
+ """Bypass rate limit for groq
11
+
12
+ Parameters
13
+ ----------
14
+ sleep_time : int, optional, default 1
15
+ max_retries : int, optional, default 10"""
16
+
17
+ def decorate_rate_limit(func):
18
+ @functools.wraps(func)
19
+ def wrapper_rate_limit(*args, **kwargs):
20
+ retries = 0
21
+ while True:
22
+ try:
23
+ result = func(*args, **kwargs)
24
+ except (RateLimitError, ReadTimeout) as e:
25
+ logger.info(f"Rate limit exceeded, sleeping for {sleep_time} seconds")
26
+ logger.debug(repr(e))
27
+ time.sleep(sleep_time)
28
+ retries += 1
29
+ if retries > max_retries:
30
+ raise e
31
+ continue
32
+
33
+ return result
34
+
35
+ return wrapper_rate_limit
36
+
37
+ return decorate_rate_limit
remote_llm.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import os
5
+ import tempfile
6
+ from typing import Callable
7
+
8
+ from gradio_client import Client
9
+ # import loguru
10
+ from groq import Groq
11
+ from loguru import logger
12
+
13
+ from hf import hf_transcript, get_whisper_hf_client
14
+ from logs import configure_logging
15
+ from rate_limit import rate_limit_bypass
16
+ # from remote_whisper import hf_transcribe_audio
17
+ from settings import app_settings
18
+ from transcribe import get_full_transcript, parse_audio
19
+
20
+ SEGMENT_TIME = 1500
21
+
22
+
23
+ @rate_limit_bypass(sleep_time=20)
24
+ def summarize_groq(client: Groq, text: str):
25
+ completion = client.chat.completions.create(
26
+ model=app_settings.model,
27
+ messages=[
28
+ {
29
+ "role": "system",
30
+ "content": "Summarize the video transcript excerpt including"
31
+ " a concise title that reflects the content. "
32
+ "Wrap the title with **markdown bold notation**. "
33
+ "Write the summary as if you are continuing a conversation without needing "
34
+ "to signal a beginning. Answer only in russian."
35
+ "Here is the transcript: "
36
+ },
37
+ {
38
+ "role": "user",
39
+ "content": text,
40
+ }
41
+ ],
42
+ temperature=app_settings.temperature,
43
+ max_tokens=1024,
44
+ top_p=1,
45
+ stream=False,
46
+ stop=None,
47
+ )
48
+ return completion.choices[0].message.content
49
+
50
+
51
+ def summarize(
52
+ texts: list[str],
53
+ client: Client | Groq,
54
+ summarizer: Callable[[Client | Groq, str], str] = summarize_groq,
55
+ ) -> str:
56
+ logger.info("Summarizing transcript...")
57
+ result = ""
58
+ i = 1
59
+ for chunk in texts:
60
+ logger.info(f"Summarizing chunk #{i}")
61
+ i += 1
62
+ result += summarizer(client, chunk)
63
+
64
+ return result
65
+
66
+
67
+ if __name__ == "__main__":
68
+ configure_logging()
69
+ parser = argparse.ArgumentParser("Video transcript summarizer")
70
+ parser.add_argument("video_path", help="Path to video file", type=str)
71
+ args = parser.parse_args()
72
+ groq_client = Groq(api_key=app_settings.groq_api_key)
73
+ hf_client = get_whisper_hf_client()
74
+
75
+ with tempfile.TemporaryDirectory() as tmpdirname:
76
+ parse_audio(args.video_path, os.path.join(tmpdirname, "audio.mp3"))
77
+ transcript = get_full_transcript(tmpdirname, hf_client, one_file_transcript_func=hf_transcript)
78
+ print(summarize(transcript, groq_client))
requirements.txt ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ ansible==8.0.0
4
+ ansible-core==2.15.0
5
+ anyio==4.4.0
6
+ argcomplete==3.1.1
7
+ certifi==2024.8.30
8
+ charset-normalizer==3.3.2
9
+ click==8.1.7
10
+ contourpy==1.3.0
11
+ cycler==0.12.1
12
+ distro==1.9.0
13
+ exceptiongroup==1.2.2
14
+ fastapi==0.112.4
15
+ ffmpy==0.4.0
16
+ filelock==3.16.0
17
+ fonttools==4.53.1
18
+ fsspec==2024.9.0
19
+ gradio==4.43.0
20
+ gradio_client==1.3.0
21
+ groq==0.11.0
22
+ h11==0.14.0
23
+ httpcore==1.0.5
24
+ httpx==0.27.2
25
+ huggingface-hub==0.24.6
26
+ idna==3.8
27
+ importlib_resources==6.4.4
28
+ Jinja2==3.1.4
29
+ kiwisolver==1.4.7
30
+ loguru==0.7.2
31
+ markdown-it-py==3.0.0
32
+ MarkupSafe==2.1.5
33
+ matplotlib==3.9.2
34
+ mdurl==0.1.2
35
+ numpy==2.1.1
36
+ orjson==3.10.7
37
+ packaging==24.1
38
+ pandas==2.2.2
39
+ pillow==10.4.0
40
+ pydantic==2.9.0
41
+ pydantic-settings==2.4.0
42
+ pydantic_core==2.23.2
43
+ pydub==0.25.1
44
+ Pygments==2.18.0
45
+ pyparsing==3.1.4
46
+ python-dateutil==2.9.0.post0
47
+ python-dotenv==1.0.1
48
+ python-multipart==0.0.9
49
+ pytz==2024.1
50
+ PyYAML==6.0
51
+ requests==2.32.3
52
+ resolvelib==1.0.1
53
+ rich==13.8.0
54
+ ruff==0.6.4
55
+ semantic-version==2.10.0
56
+ shellingham==1.5.4
57
+ six==1.16.0
58
+ sniffio==1.3.1
59
+ starlette==0.38.5
60
+ tomlkit==0.12.0
61
+ tqdm==4.66.5
62
+ typer==0.12.5
63
+ typing_extensions==4.12.2
64
+ tzdata==2024.1
65
+ urllib3==2.2.2
66
+ uvicorn==0.30.6
67
+ websockets==12.0
settings.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import enum
2
+
3
+ from pydantic_settings import BaseSettings
4
+
5
+
6
+ class LogLevel(str, enum.Enum):
7
+ """Possible log levels."""
8
+
9
+ NOTSET = "NOTSET"
10
+ DEBUG = "DEBUG"
11
+ INFO = "INFO"
12
+ WARNING = "WARNING"
13
+ ERROR = "ERROR"
14
+ FATAL = "FATAL"
15
+
16
+
17
+ class Settings(BaseSettings):
18
+ temperature: float = 1
19
+ model: str = "llama3-8b-8192"
20
+ log_level: LogLevel = LogLevel.INFO
21
+ segment_time: int = 1500
22
+
23
+
24
+ app_settings = Settings()
transcribe.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import time
5
+ from typing import Callable
6
+
7
+ from gradio_client import Client
8
+ from groq import Groq
9
+ from loguru import logger
10
+
11
+ from rate_limit import rate_limit_bypass
12
+ from settings import app_settings
13
+
14
+
15
+ def parse_audio(input_file_path: str, output_file_path: str) -> None:
16
+ """Parse audio file from video file to mp3 format"""
17
+ os.system(f"ffmpeg -y -i {input_file_path} -f mp3 {output_file_path}")
18
+
19
+
20
+ def split_audio_file(audio_folder_path: str, segment_time: int = app_settings.segment_time) -> None:
21
+ """Splits an audio file into multiple segments using ffmpeg.
22
+
23
+ Parameters
24
+ ----------
25
+ audio_folder_path : str
26
+ The path to the audio file.
27
+ segment_time : int, optional
28
+ Time in seconds for each segment.
29
+ """
30
+ audio_file_path = os.path.join(audio_folder_path, "audio.mp3")
31
+ output_file_template = os.path.join(audio_folder_path, "audio_%03d.mp3")
32
+ os.system(
33
+ f"ffmpeg -y -i {audio_file_path} -f segment -segment_time {segment_time} -c copy {output_file_template}")
34
+
35
+
36
+ @rate_limit_bypass(sleep_time=10)
37
+ def groq_transcript(client: Groq, audio_file_path: str) -> str:
38
+ """Get transcript for one file.
39
+
40
+ Parameters
41
+ ----------
42
+ client : Groq
43
+ audio_file_path : str
44
+ The path to the audio file to transcribe."""
45
+ with open(audio_file_path, "rb") as file:
46
+ transcription = client.audio.transcriptions.create(
47
+ file=(audio_file_path, file.read()),
48
+ model="whisper-large-v3",
49
+ )
50
+ logger.debug(f"Transcription: {transcription.text}")
51
+ return transcription.text
52
+
53
+
54
+ def get_full_transcript(
55
+ audio_folder_path: str,
56
+ client: Client | Groq,
57
+ one_file_transcript_func: Callable[[Client | Groq, str], str] = groq_transcript,
58
+ ) -> list[str]:
59
+ """Get full transcript for all audio files in a folder.
60
+
61
+ Parameters
62
+ ----------
63
+ audio_folder_path : str
64
+ folder, where all audio files are located.
65
+ one_file_transcript_func : Callable[[str], str], optional
66
+ Function that transcribes a single audio file.
67
+ client : Client | Groq
68
+ A client object to pass to transcript function
69
+
70
+ Returns
71
+ -------
72
+ list[str]
73
+ A list of transcripts for all audio files in a folder.
74
+ ."""
75
+
76
+ logger.info("Getting transcript...")
77
+ split_audio_file(audio_folder_path)
78
+ transcript = []
79
+ for file_name in os.listdir(audio_folder_path):
80
+ if file_name.startswith("audio_"):
81
+ audio_file_path = os.path.join(audio_folder_path, file_name)
82
+ transcript += [one_file_transcript_func(client, audio_file_path)]
83
+ time.sleep(2)
84
+ return transcript