camparchimedes commited on
Commit
64e12f4
1 Parent(s): dda0718

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -23,7 +23,7 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
23
  torch_dtype = torch.float32
24
 
25
  # ASR pipeline
26
- pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large-semantic", device=device, torch_dtype=torch.float32)
27
 
28
  # Switch m4a to wav
29
  def convert_to_wav(audio_file):
@@ -35,7 +35,7 @@ def convert_to_wav(audio_file):
35
  # @spaces.GPU(queue=True)
36
 
37
 
38
- # Transcription funct.@ASR pipeline
39
  def transcribe_audio(audio_file):
40
  if audio_file.endswith(".m4a"):
41
  audio_file = convert_to_wav(audio_file)
@@ -43,7 +43,7 @@ def transcribe_audio(audio_file):
43
  start_time = time.time()
44
 
45
  with torch.no_grad():
46
- output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"num_beams": 8, "task": "transcribe", "language": "no"})
47
 
48
  transcription = output["text"]
49
  end_time = time.time()
@@ -82,7 +82,7 @@ def summarize_text(text):
82
 
83
 
84
  # HTML syntax for imagery
85
- image_html = """
86
  <div style="text-align: center;">
87
  <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.png" alt="Banner" width="85%" height="auto">
88
  </div>
@@ -107,7 +107,7 @@ def save_to_pdf(transcription, summary):
107
  pdf.output(pdf_output_path)
108
  return pdf_output_path
109
 
110
- # Gradio
111
  iface = gr.Interface(
112
  fn=transcribe_audio,
113
  inputs=gr.Audio(type="filepath"),
@@ -120,7 +120,7 @@ iface = gr.Interface(
120
  iface = gr.Blocks()
121
 
122
  with iface:
123
- gr.HTML(image_html)
124
  gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
125
 
126
  with gr.Tabs():
@@ -197,4 +197,4 @@ with iface:
197
 
198
 
199
  # run
200
- iface.launch(share=True, debug=True)
 
23
  torch_dtype = torch.float32
24
 
25
  # ASR pipeline
26
+ asr = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large-semantic", device=device, torch_dtype=torch_dtype)
27
 
28
  # Switch m4a to wav
29
  def convert_to_wav(audio_file):
 
35
  # @spaces.GPU(queue=True)
36
 
37
 
38
+ # Transcription funct.@ASR
39
  def transcribe_audio(audio_file):
40
  if audio_file.endswith(".m4a"):
41
  audio_file = convert_to_wav(audio_file)
 
43
  start_time = time.time()
44
 
45
  with torch.no_grad():
46
+ output = asr(audio_file, chunk_length_s=30, generate_kwargs={"num_beams": 8, "task": "transcribe", "language": "no"})
47
 
48
  transcription = output["text"]
49
  end_time = time.time()
 
82
 
83
 
84
  # HTML syntax for imagery
85
+ banner_html = """
86
  <div style="text-align: center;">
87
  <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.png" alt="Banner" width="85%" height="auto">
88
  </div>
 
107
  pdf.output(pdf_output_path)
108
  return pdf_output_path
109
 
110
+ # Gradio
111
  iface = gr.Interface(
112
  fn=transcribe_audio,
113
  inputs=gr.Audio(type="filepath"),
 
120
  iface = gr.Blocks()
121
 
122
  with iface:
123
+ gr.HTML(banner_html)
124
  gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
125
 
126
  with gr.Tabs():
 
197
 
198
 
199
  # run
200
+ iface.launch(share=True, debug=True)