Spaces:
Running
on
Zero
Running
on
Zero
Cuda no ordinal
Browse files- app.py +11 -11
- models/salmonn.py +2 -2
app.py
CHANGED
@@ -65,15 +65,15 @@ qwen_model.generation_config = GenerationConfig.from_pretrained(
|
|
65 |
)
|
66 |
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
|
78 |
|
79 |
diva = AutoModel.from_pretrained("WillHeld/DiVA-llama-3-v0-8b", trust_remote_code=True)
|
@@ -210,7 +210,7 @@ def transcribe(audio_input, text_prompt, state, model_order):
|
|
210 |
initial_responses = [("", "", "")]
|
211 |
resp_generators = [
|
212 |
gen_from_via(),
|
213 |
-
|
214 |
gen_from_qwen(),
|
215 |
]
|
216 |
order = -1
|
@@ -250,7 +250,7 @@ def transcribe(audio_input, text_prompt, state, model_order):
|
|
250 |
resp_2,
|
251 |
resp_3,
|
252 |
gr.Button(visible=True),
|
253 |
-
gr.Button(visible=
|
254 |
gr.Button(visible=True),
|
255 |
responses_complete(state),
|
256 |
)
|
|
|
65 |
)
|
66 |
|
67 |
|
68 |
+
salmonn_model = SALMONN(
|
69 |
+
ckpt="./SALMONN_PATHS/salmonn_v1.pth",
|
70 |
+
whisper_path="./SALMONN_PATHS/whisper-large-v2",
|
71 |
+
beats_path="./SALMONN_PATHS/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt",
|
72 |
+
vicuna_path="./SALMONN_PATHS/vicuna-13b-v1.1",
|
73 |
+
low_resource=False,
|
74 |
+
device="cuda",
|
75 |
+
)
|
76 |
+
salmonn_tokenizer = salmonn_model.llama_tokenizer
|
77 |
|
78 |
|
79 |
diva = AutoModel.from_pretrained("WillHeld/DiVA-llama-3-v0-8b", trust_remote_code=True)
|
|
|
210 |
initial_responses = [("", "", "")]
|
211 |
resp_generators = [
|
212 |
gen_from_via(),
|
213 |
+
gen_from_salmonn(),
|
214 |
gen_from_qwen(),
|
215 |
]
|
216 |
order = -1
|
|
|
250 |
resp_2,
|
251 |
resp_3,
|
252 |
gr.Button(visible=True),
|
253 |
+
gr.Button(visible=True),
|
254 |
gr.Button(visible=True),
|
255 |
responses_complete(state),
|
256 |
)
|
models/salmonn.py
CHANGED
@@ -44,7 +44,7 @@ class SALMONN(nn.Module):
|
|
44 |
speech_qformer_token_num=1,
|
45 |
speech_qformer_layer=2,
|
46 |
lora=True,
|
47 |
-
device="cuda
|
48 |
lora_alpha=32,
|
49 |
lora_rank=8,
|
50 |
lora_dropout=0.1,
|
@@ -138,7 +138,7 @@ class SALMONN(nn.Module):
|
|
138 |
wav_path,
|
139 |
prompt,
|
140 |
prompt_pattern="USER: <Speech><SpeechHere></Speech> {}\nASSISTANT:",
|
141 |
-
device="cuda
|
142 |
max_length=200,
|
143 |
max_new_tokens=128,
|
144 |
num_beams=1,
|
|
|
44 |
speech_qformer_token_num=1,
|
45 |
speech_qformer_layer=2,
|
46 |
lora=True,
|
47 |
+
device="cuda",
|
48 |
lora_alpha=32,
|
49 |
lora_rank=8,
|
50 |
lora_dropout=0.1,
|
|
|
138 |
wav_path,
|
139 |
prompt,
|
140 |
prompt_pattern="USER: <Speech><SpeechHere></Speech> {}\nASSISTANT:",
|
141 |
+
device="cuda",
|
142 |
max_length=200,
|
143 |
max_new_tokens=128,
|
144 |
num_beams=1,
|