peteralexandercharles patrickvonplaten commited on
Commit
84b3438
0 Parent(s):

Duplicate from speechbox/whisper-restore-punctuation

Browse files

Co-authored-by: Patrick von Platen <patrickvonplaten@users.noreply.huggingface.co>

Files changed (7) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +46 -0
  4. common_voice_en_18301577.mp3 +0 -0
  5. requirements.txt +5 -0
  6. sample1.flac +0 -0
  7. sample2.flac +0 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Whisper Restore Punctuation
3
+ emoji: 👀
4
+ colorFrom: purple
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.15.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ duplicated_from: speechbox/whisper-restore-punctuation
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from speechbox import PunctuationRestorer
2
+ import librosa
3
+ import subprocess
4
+ import gradio as gr
5
+
6
+ restorer = PunctuationRestorer.from_pretrained("openai/whisper-tiny.en")
7
+
8
+
9
+ def convert_to_wav(path):
10
+ if path[-3:] != 'wav':
11
+ new_path = '.'.join(path.split('.')[:-1]) + '.wav'
12
+ try:
13
+ subprocess.call(['ffmpeg', '-i', path, new_path, '-y'])
14
+ except: # noqa: E722
15
+ return path, 'Error: Could not convert file to .wav'
16
+ path = new_path
17
+ return path, None
18
+
19
+
20
+ def restore(audio, original_transcript):
21
+ path, error = convert_to_wav(audio)
22
+ print(error)
23
+ data, samplerate = librosa.load(path, sr=16_000)
24
+
25
+ text, log_probs = restorer(data, original_transcript, samplerate, num_beams=1)
26
+
27
+ return text, log_probs
28
+
29
+
30
+ gr.Interface(
31
+ title='Punctuation Restorer',
32
+ fn=restore,
33
+ inputs=[
34
+ gr.inputs.Audio(source="upload", type="filepath"),
35
+ gr.inputs.Textbox(default="", label="normalized text")
36
+ ],
37
+ outputs=[
38
+ gr.outputs.Textbox(label='Restored text'),
39
+ gr.Number(label='Log probability')
40
+ ],
41
+ examples=[
42
+ ["./common_voice_en_18301577.mp3", "do not cross the yellow light"],
43
+ ["./sample1.flac", "going along slushy country roads and speaking to damp audiences in draughty school rooms day after day for a fortnight he'll have to put in an appearance at some place of worship on sunday morning and he can come to us immediately afterwards"],
44
+ ["./sample2.flac", "before he had time to answer a much encumbered vera burst into the room with the question i say can i leave these here these were a small black pig and a lusty specimen of black red game cock"],
45
+ ]
46
+ ).launch()
common_voice_en_18301577.mp3 ADDED
Binary file (19.1 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers>=4.25.0
2
+ torch
3
+ speechbox>=0.1.2
4
+ librosa
5
+ accelerate
sample1.flac ADDED
Binary file (282 kB). View file
 
sample2.flac ADDED
Binary file (278 kB). View file