Spaces:
Sleeping
Sleeping
camparchimedes
commited on
Commit
•
ad6d7c2
1
Parent(s):
071df52
Update app.py
Browse files
app.py
CHANGED
@@ -13,27 +13,26 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#---------------------------------------------------------------------------------------------------------------------------------------------
|
16 |
-
|
17 |
-
|
18 |
import gradio as gr
|
19 |
from PIL import Image
|
20 |
from pydub import AudioSegment
|
21 |
import os
|
22 |
import re
|
23 |
-
import warnings
|
24 |
import time
|
25 |
-
import
|
|
|
26 |
import subprocess
|
27 |
from pathlib import Path
|
28 |
from fpdf import FPDF
|
29 |
|
30 |
-
|
31 |
from gpuinfo import GPUInfo
|
32 |
-
import pandas as pd
|
|
|
33 |
import numpy as np
|
34 |
import torch
|
35 |
-
import torchaudio
|
36 |
-
import torchaudio.transforms as transforms
|
37 |
|
38 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
39 |
|
@@ -41,21 +40,20 @@ import spacy
|
|
41 |
import networkx as nx
|
42 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
43 |
from sklearn.metrics.pairwise import cosine_similarity
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
|
48 |
|
49 |
HEADER_INFO = """
|
50 |
# WEB APP ✨| Norwegian WHISPER Model
|
51 |
Switch Work [Transkribering av lydfiler til norsk skrift]
|
52 |
""".strip()
|
53 |
-
LOGO = "https://huggingface.co/spaces/camparchimedes/transcription_app/
|
54 |
SIDEBAR_INFO = f"""
|
55 |
-
<div align=center>
|
56 |
-
<img src="{LOGO}" style="width:
|
57 |
-
|
58 |
-
|
59 |
|
60 |
def convert_to_wav(filepath):
|
61 |
_,file_ending = os.path.splitext(f'{filepath}')
|
@@ -63,13 +61,6 @@ def convert_to_wav(filepath):
|
|
63 |
os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
|
64 |
return audio_file
|
65 |
|
66 |
-
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
67 |
-
#def convert_to_wav(audio_file):
|
68 |
-
#audio = AudioSegment.from_file(audio_file, format="m4a")
|
69 |
-
#wav_file = "temp.wav"
|
70 |
-
#audio.export(wav_file, format="wav")
|
71 |
-
#return wav_file
|
72 |
-
|
73 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
74 |
|
75 |
pipe = pipeline(
|
@@ -86,16 +77,12 @@ def transcribe_audio(audio_file, batch_size=10):
|
|
86 |
start_time = time.time()
|
87 |
|
88 |
outputs = pipe(audio_file, batch_size=batch_size, return_timestamps=False, generate_kwargs={'task': 'transcribe', 'language': 'no'}) # skip_special_tokens=True
|
89 |
-
#options = dict(language=selected_source_lang, beam_size=3, best_of=3)
|
90 |
-
#transcribe_options = dict(task="transcribe", **options)
|
91 |
-
#result = model.transcribe(file, **transcribe_options)
|
92 |
text = outputs["text"]
|
93 |
|
94 |
end_time = time.time()
|
|
|
95 |
output_time = end_time - start_time
|
96 |
word_count = len(text.split())
|
97 |
-
|
98 |
-
|
99 |
memory = psutil.virtual_memory()
|
100 |
gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
|
101 |
gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
|
@@ -106,7 +93,6 @@ def transcribe_audio(audio_file, batch_size=10):
|
|
106 |
*Number of words: {word_count}*
|
107 |
*GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}*"""
|
108 |
|
109 |
-
|
110 |
return text.strip(), system_info
|
111 |
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
112 |
|
@@ -223,7 +209,7 @@ iface = gr.Blocks()
|
|
223 |
|
224 |
with iface:
|
225 |
|
226 |
-
gr.
|
227 |
gr.Markdown(HEADER_INFO)
|
228 |
|
229 |
with gr.Tabs():
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#---------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
|
|
16 |
import gradio as gr
|
17 |
from PIL import Image
|
18 |
from pydub import AudioSegment
|
19 |
import os
|
20 |
import re
|
|
|
21 |
import time
|
22 |
+
import warnings
|
23 |
+
#import datetime
|
24 |
import subprocess
|
25 |
from pathlib import Path
|
26 |
from fpdf import FPDF
|
27 |
|
28 |
+
import psutil
|
29 |
from gpuinfo import GPUInfo
|
30 |
+
#import pandas as pd
|
31 |
+
#import csv
|
32 |
import numpy as np
|
33 |
import torch
|
34 |
+
#import torchaudio
|
35 |
+
#import torchaudio.transforms as transforms
|
36 |
|
37 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
38 |
|
|
|
40 |
import networkx as nx
|
41 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
42 |
from sklearn.metrics.pairwise import cosine_similarity
|
43 |
+
#---------------------------------------------------------------------------------------------------------------------------------------------
|
44 |
+
warnings.filterwarnings("ignore")
|
|
|
45 |
|
46 |
|
47 |
HEADER_INFO = """
|
48 |
# WEB APP ✨| Norwegian WHISPER Model
|
49 |
Switch Work [Transkribering av lydfiler til norsk skrift]
|
50 |
""".strip()
|
51 |
+
LOGO = "https://huggingface.co/spaces/camparchimedes/transcription_app/resolve/main/pic09w9678yhit.png"
|
52 |
SIDEBAR_INFO = f"""
|
53 |
+
<div align="center">
|
54 |
+
<img src="{LOGO}" style="width: 100%; height: auto;"/>
|
55 |
+
</div>
|
56 |
+
"""
|
57 |
|
58 |
def convert_to_wav(filepath):
|
59 |
_,file_ending = os.path.splitext(f'{filepath}')
|
|
|
61 |
os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
|
62 |
return audio_file
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
65 |
|
66 |
pipe = pipeline(
|
|
|
77 |
start_time = time.time()
|
78 |
|
79 |
outputs = pipe(audio_file, batch_size=batch_size, return_timestamps=False, generate_kwargs={'task': 'transcribe', 'language': 'no'}) # skip_special_tokens=True
|
|
|
|
|
|
|
80 |
text = outputs["text"]
|
81 |
|
82 |
end_time = time.time()
|
83 |
+
|
84 |
output_time = end_time - start_time
|
85 |
word_count = len(text.split())
|
|
|
|
|
86 |
memory = psutil.virtual_memory()
|
87 |
gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
|
88 |
gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
|
|
|
93 |
*Number of words: {word_count}*
|
94 |
*GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}*"""
|
95 |
|
|
|
96 |
return text.strip(), system_info
|
97 |
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
98 |
|
|
|
209 |
|
210 |
with iface:
|
211 |
|
212 |
+
gr.HTML(SIDEBAR_INFO)
|
213 |
gr.Markdown(HEADER_INFO)
|
214 |
|
215 |
with gr.Tabs():
|