Kevin676 commited on
Commit
bd1e7ad
1 Parent(s): 2a86421

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -4
app.py CHANGED
@@ -3,6 +3,9 @@ from peft import PeftModel
3
  import transformers
4
  import gradio as gr
5
 
 
 
 
6
  assert (
7
  "LlamaTokenizer" in transformers._import_structure["models.llama"]
8
  ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
@@ -84,7 +87,8 @@ if torch.__version__ >= "2":
84
 
85
 
86
  def evaluate(
87
- instruction,
 
88
  input=None,
89
  temperature=0.1,
90
  top_p=0.75,
@@ -93,6 +97,24 @@ def evaluate(
93
  max_new_tokens=128,
94
  **kwargs,
95
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  prompt = generate_prompt(instruction, input)
97
  inputs = tokenizer(prompt, return_tensors="pt")
98
  input_ids = inputs["input_ids"].to(device)
@@ -119,9 +141,7 @@ def evaluate(
119
  g = gr.Interface(
120
  fn=evaluate,
121
  inputs=[
122
- gr.components.Textbox(
123
- lines=2, label="Instruction", placeholder="Tell me about alpacas."
124
- ),
125
  gr.components.Textbox(lines=2, label="Input", placeholder="none"),
126
  gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
127
  gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
 
3
  import transformers
4
  import gradio as gr
5
 
6
+ import whisper
7
+ model1 = whisper.load_model("small")
8
+
9
  assert (
10
  "LlamaTokenizer" in transformers._import_structure["models.llama"]
11
  ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
 
87
 
88
 
89
  def evaluate(
90
+ # instruction,
91
+ audio,
92
  input=None,
93
  temperature=0.1,
94
  top_p=0.75,
 
97
  max_new_tokens=128,
98
  **kwargs,
99
  ):
100
+
101
+ # load audio and pad/trim it to fit 30 seconds
102
+ audio = whisper.load_audio(audio)
103
+ audio = whisper.pad_or_trim(audio)
104
+
105
+ # make log-Mel spectrogram and move to the same device as the model
106
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
107
+
108
+ # detect the spoken language
109
+ _, probs = model.detect_language(mel)
110
+ print(f"Detected language: {max(probs, key=probs.get)}")
111
+
112
+ # decode the audio
113
+ options = whisper.DecodingOptions()
114
+ result = whisper.decode(model, mel, options)
115
+
116
+ instruction = result.text
117
+
118
  prompt = generate_prompt(instruction, input)
119
  inputs = tokenizer(prompt, return_tensors="pt")
120
  input_ids = inputs["input_ids"].to(device)
 
141
  g = gr.Interface(
142
  fn=evaluate,
143
  inputs=[
144
+ gr.Audio(source="microphone", label = "请上传您喜欢的声音(wav文件)", type="filepath"),
 
 
145
  gr.components.Textbox(lines=2, label="Input", placeholder="none"),
146
  gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
147
  gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),