camparchimedes commited on
Commit
14c8f51
β€’
1 Parent(s): fbecba6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -7,9 +7,11 @@ from transformers import WhisperTokenizer, WhisperForConditionalGeneration, Whis
7
  import soundfile as sf
8
  import ffmpeg
9
  import os
 
10
  from huggingface_hub import InferenceClient
11
  from gradio_client import Client, file
12
  import spaces
 
13
 
14
  warnings.filterwarnings("ignore")
15
 
@@ -30,9 +32,9 @@ def convert_audio_format(audio_path):
30
  ffmpeg.input(audio_path).output(output_path, format='wav', ar='16000').run(overwrite_output=True)
31
  return output_path
32
 
33
-
34
  @spaces.GPU(duration=120, queue=False)
35
  def transcribe_audio(audio_file, batch_size=4):
 
36
  audio_path = convert_audio_format(audio_file)
37
  audio_input, sample_rate = sf.read(audio_path)
38
  chunk_size = 16000 * 28 # 28 seconds chunks
@@ -47,40 +49,38 @@ def transcribe_audio(audio_file, batch_size=4):
47
  with torch.no_grad():
48
  output = model.generate(
49
  inputs.input_features,
50
- max_length=1024,
51
  num_beams=7,
52
  attention_mask=attention_mask
53
  )
54
  transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
55
 
56
- return transcription.strip()
 
 
 
 
 
 
57
 
58
  # HTML
59
  banner_html = """
60
  <div style="text-align: center;">
61
  <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%" height="auto">
62
  </div>
 
 
63
  <div style="text-align: center; margin-top: 20px;">
64
- <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.jpg" alt="picture" width="50%" height="auto">
65
  </div>
66
  """
67
 
68
- images_path = os.path.dirname(__file__)
69
- IMAGES = [
70
- [
71
- {
72
- "text": "What usual stuff happens in this image? :)",
73
- "files": [f"{images_path}/500x_picture.png"],
74
- }
75
- ]
76
- ]
77
-
78
  # Gradio interface
79
  iface = gr.Blocks()
80
 
81
  with iface:
82
  gr.HTML(banner_html)
83
- gr.Markdown("# 𝐍𝐯𝐒𝐝𝐒𝐚 π€πŸπŸŽπŸŽ πŸ‘‹πŸΌπŸ‘ΎπŸ¦Ύβš‘ @{NbAiLab/whisper-norwegian-medium}\nUpload audio file:β˜•")
84
  audio_input = gr.Audio(type="filepath")
85
  batch_size_input = gr.Slider(minimum=1, maximum=16, step=1, label="Batch Size")
86
  transcription_output = gr.Textbox()
 
7
  import soundfile as sf
8
  import ffmpeg
9
  import os
10
+ from PIL import Image
11
  from huggingface_hub import InferenceClient
12
  from gradio_client import Client, file
13
  import spaces
14
+ import time
15
 
16
  warnings.filterwarnings("ignore")
17
 
 
32
  ffmpeg.input(audio_path).output(output_path, format='wav', ar='16000').run(overwrite_output=True)
33
  return output_path
34
 
 
35
  @spaces.GPU(duration=120, queue=False)
36
  def transcribe_audio(audio_file, batch_size=4):
37
+ start_time = time.time()
38
  audio_path = convert_audio_format(audio_file)
39
  audio_input, sample_rate = sf.read(audio_path)
40
  chunk_size = 16000 * 28 # 28 seconds chunks
 
49
  with torch.no_grad():
50
  output = model.generate(
51
  inputs.input_features,
52
+ max_length=2048,
53
  num_beams=7,
54
  attention_mask=attention_mask
55
  )
56
  transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
57
 
58
+ end_time = time.time()
59
+ transcription_time = end_time - start_time
60
+ word_count = len(transcription.split())
61
+
62
+ result = f"Transcription: {transcription.strip()}\n\nTime taken: {transcription_time:.2f} seconds\nNumber of words: {word_count}"
63
+
64
+ return result
65
 
66
  # HTML
67
  banner_html = """
68
  <div style="text-align: center;">
69
  <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%" height="auto">
70
  </div>
71
+ """
72
+ image_html = """
73
  <div style="text-align: center; margin-top: 20px;">
74
+ <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/blob/main/500x_picture.png" alt="picture" width="50%" height="auto">
75
  </div>
76
  """
77
 
 
 
 
 
 
 
 
 
 
 
78
  # Gradio interface
79
  iface = gr.Blocks()
80
 
81
  with iface:
82
  gr.HTML(banner_html)
83
+ gr.Markdown("# 𝐍𝐯𝐒𝐝𝐒𝐚 π€πŸπŸŽπŸŽ πŸ‘‹πŸΌπŸ‘ΎπŸ¦Ύβš‘ @{NbAiLab/whisper-norwegian-medium}\nUpload audio file: β˜•")
84
  audio_input = gr.Audio(type="filepath")
85
  batch_size_input = gr.Slider(minimum=1, maximum=16, step=1, label="Batch Size")
86
  transcription_output = gr.Textbox()