Spaces:

Godota
/

speech-to-text-speaker

Running on Zero

Godota commited on 2 days ago

Commit

fd22b43

•

1 Parent(s): 56e6fe2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,17 @@
 import logging
 logging.getLogger("httpx").setLevel(logging.WARNING)
 import gradio as gr
 from transformers import pipeline
 import torch
@@ -12,25 +23,14 @@ import torchaudio
 import numpy as np
 from sklearn.cluster import AgglomerativeClustering
 import tempfile
-import os
-import subprocess
-# Initialize Whisper with flash attention
-pipe = pipeline(
-    "automatic-speech-recognition",
-    model="openai/whisper-large-v3-turbo",
-    torch_dtype=torch.float16,
-    device="cuda:0",
-    model_kwargs={"attn_implementation": "flash_attention_2"},
-)
-# Initialize Whisper with flash attention
 pipe = pipeline(
     "automatic-speech-recognition",
     model="openai/whisper-large-v3-turbo",
     torch_dtype=torch.float16,
     device="cuda:0",
-    model_kwargs={"attn_implementation": "flash_attention_2"},
 )
 # Speaker model initialization

 import logging
 logging.getLogger("httpx").setLevel(logging.WARNING)
+import os
+os.environ["CUDA_HOME"] = "/usr/local/cuda"
+os.environ["FLASH_ATTENTION_SKIP_CUDA_BUILD"] = "TRUE"
+import subprocess
+subprocess.run(
+    "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    shell=True,
+)
 import gradio as gr
 from transformers import pipeline
 import torch
 import numpy as np
 from sklearn.cluster import AgglomerativeClustering
 import tempfile
+# Initialize Whisper
 pipe = pipeline(
     "automatic-speech-recognition",
     model="openai/whisper-large-v3-turbo",
     torch_dtype=torch.float16,
     device="cuda:0",
+    model_kwargs={"attn_implementation": "sdpa"}  # Змінено на sdpa замість flash_attention_2
 )
 # Speaker model initialization