Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,17 @@
|
|
1 |
import logging
|
2 |
logging.getLogger("httpx").setLevel(logging.WARNING)
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import gradio as gr
|
5 |
from transformers import pipeline
|
6 |
import torch
|
@@ -12,25 +23,14 @@ import torchaudio
|
|
12 |
import numpy as np
|
13 |
from sklearn.cluster import AgglomerativeClustering
|
14 |
import tempfile
|
15 |
-
import os
|
16 |
-
import subprocess
|
17 |
-
|
18 |
-
# Initialize Whisper with flash attention
|
19 |
-
pipe = pipeline(
|
20 |
-
"automatic-speech-recognition",
|
21 |
-
model="openai/whisper-large-v3-turbo",
|
22 |
-
torch_dtype=torch.float16,
|
23 |
-
device="cuda:0",
|
24 |
-
model_kwargs={"attn_implementation": "flash_attention_2"},
|
25 |
-
)
|
26 |
|
27 |
-
# Initialize Whisper
|
28 |
pipe = pipeline(
|
29 |
"automatic-speech-recognition",
|
30 |
model="openai/whisper-large-v3-turbo",
|
31 |
torch_dtype=torch.float16,
|
32 |
device="cuda:0",
|
33 |
-
model_kwargs={"attn_implementation": "
|
34 |
)
|
35 |
|
36 |
# Speaker model initialization
|
|
|
1 |
import logging
|
2 |
logging.getLogger("httpx").setLevel(logging.WARNING)
|
3 |
|
4 |
+
import os
|
5 |
+
os.environ["CUDA_HOME"] = "/usr/local/cuda"
|
6 |
+
os.environ["FLASH_ATTENTION_SKIP_CUDA_BUILD"] = "TRUE"
|
7 |
+
|
8 |
+
import subprocess
|
9 |
+
subprocess.run(
|
10 |
+
"pip install flash-attn --no-build-isolation",
|
11 |
+
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
12 |
+
shell=True,
|
13 |
+
)
|
14 |
+
|
15 |
import gradio as gr
|
16 |
from transformers import pipeline
|
17 |
import torch
|
|
|
23 |
import numpy as np
|
24 |
from sklearn.cluster import AgglomerativeClustering
|
25 |
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
# Initialize Whisper
|
28 |
pipe = pipeline(
|
29 |
"automatic-speech-recognition",
|
30 |
model="openai/whisper-large-v3-turbo",
|
31 |
torch_dtype=torch.float16,
|
32 |
device="cuda:0",
|
33 |
+
model_kwargs={"attn_implementation": "sdpa"} # Змінено на sdpa замість flash_attention_2
|
34 |
)
|
35 |
|
36 |
# Speaker model initialization
|