WillHeld commited on
Commit
db669a0
1 Parent(s): e9d8955

Cuda no ordinal

Browse files
Files changed (2) hide show
  1. app.py +11 -11
  2. models/salmonn.py +2 -2
app.py CHANGED
@@ -65,15 +65,15 @@ qwen_model.generation_config = GenerationConfig.from_pretrained(
65
  )
66
 
67
 
68
- # salmonn_model = SALMONN(
69
- # ckpt="./SALMONN_PATHS/salmonn_v1.pth",
70
- # whisper_path="./SALMONN_PATHS/whisper-large-v2",
71
- # beats_path="./SALMONN_PATHS/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt",
72
- # vicuna_path="./SALMONN_PATHS/vicuna-13b-v1.1",
73
- # low_resource=False,
74
- # device="cuda:0",
75
- # )
76
- # salmonn_tokenizer = salmonn_model.llama_tokenizer
77
 
78
 
79
  diva = AutoModel.from_pretrained("WillHeld/DiVA-llama-3-v0-8b", trust_remote_code=True)
@@ -210,7 +210,7 @@ def transcribe(audio_input, text_prompt, state, model_order):
210
  initial_responses = [("", "", "")]
211
  resp_generators = [
212
  gen_from_via(),
213
- #gen_from_salmonn(),
214
  gen_from_qwen(),
215
  ]
216
  order = -1
@@ -250,7 +250,7 @@ def transcribe(audio_input, text_prompt, state, model_order):
250
  resp_2,
251
  resp_3,
252
  gr.Button(visible=True),
253
- gr.Button(visible=False),
254
  gr.Button(visible=True),
255
  responses_complete(state),
256
  )
 
65
  )
66
 
67
 
68
+ salmonn_model = SALMONN(
69
+ ckpt="./SALMONN_PATHS/salmonn_v1.pth",
70
+ whisper_path="./SALMONN_PATHS/whisper-large-v2",
71
+ beats_path="./SALMONN_PATHS/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt",
72
+ vicuna_path="./SALMONN_PATHS/vicuna-13b-v1.1",
73
+ low_resource=False,
74
+ device="cuda",
75
+ )
76
+ salmonn_tokenizer = salmonn_model.llama_tokenizer
77
 
78
 
79
  diva = AutoModel.from_pretrained("WillHeld/DiVA-llama-3-v0-8b", trust_remote_code=True)
 
210
  initial_responses = [("", "", "")]
211
  resp_generators = [
212
  gen_from_via(),
213
+ gen_from_salmonn(),
214
  gen_from_qwen(),
215
  ]
216
  order = -1
 
250
  resp_2,
251
  resp_3,
252
  gr.Button(visible=True),
253
+ gr.Button(visible=True),
254
  gr.Button(visible=True),
255
  responses_complete(state),
256
  )
models/salmonn.py CHANGED
@@ -44,7 +44,7 @@ class SALMONN(nn.Module):
44
  speech_qformer_token_num=1,
45
  speech_qformer_layer=2,
46
  lora=True,
47
- device="cuda:0",
48
  lora_alpha=32,
49
  lora_rank=8,
50
  lora_dropout=0.1,
@@ -138,7 +138,7 @@ class SALMONN(nn.Module):
138
  wav_path,
139
  prompt,
140
  prompt_pattern="USER: <Speech><SpeechHere></Speech> {}\nASSISTANT:",
141
- device="cuda:0",
142
  max_length=200,
143
  max_new_tokens=128,
144
  num_beams=1,
 
44
  speech_qformer_token_num=1,
45
  speech_qformer_layer=2,
46
  lora=True,
47
+ device="cuda",
48
  lora_alpha=32,
49
  lora_rank=8,
50
  lora_dropout=0.1,
 
138
  wav_path,
139
  prompt,
140
  prompt_pattern="USER: <Speech><SpeechHere></Speech> {}\nASSISTANT:",
141
+ device="cuda",
142
  max_length=200,
143
  max_new_tokens=128,
144
  num_beams=1,