Spaces:
Sleeping
Sleeping
Update app.py
Browse filesadding the actual code and testing it on the spaces
app.py
CHANGED
@@ -1,6 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
def upload_file(files): # the actual translation should happen here
|
5 |
"""
|
6 |
takes the file that comes from the UI and converts it to the respective
|
@@ -11,8 +72,9 @@ def upload_file(files): # the actual translation should happen here
|
|
11 |
|
12 |
# Define the data as lists. Here the data would be sent in the form of single data fro mthe data
|
13 |
|
14 |
-
data = [ ["ca_1.wav", "बिना किसी पूर्व सूचना के विलंबित या रद्द की गई ट्रिनिक", "without any prior information or any delay or delay in the train journey", "hindi"]]
|
15 |
|
|
|
16 |
|
17 |
# Create the DataFrameoutputs
|
18 |
if data is not None:
|
@@ -35,10 +97,11 @@ with gr.Blocks() as demo:
|
|
35 |
datatype=["str", "str", "str"],
|
36 |
row_count=1,
|
37 |
col_count=(4, "fixed"),
|
|
|
38 |
)
|
39 |
upload_button.upload(upload_file, upload_button, df_output, show_progress = True) # upload the audio file and and sends to the upload function
|
40 |
|
41 |
|
42 |
|
43 |
if __name__ == "__main__":
|
44 |
-
demo.launch()
|
|
|
1 |
+
|
2 |
+
# imports for
|
3 |
+
from datasets import Audio, load_dataset, Dataset
|
4 |
+
import torch
|
5 |
+
from transformers import pipeline
|
6 |
+
|
7 |
+
from pathlib import Path
|
8 |
+
|
9 |
+
# for the UI by Gradio
|
10 |
import gradio as gr
|
11 |
import pandas as pd
|
12 |
|
13 |
+
|
14 |
+
# initializing the values for device
|
15 |
+
if torch.cuda.is_available():
|
16 |
+
DEVICE = "cuda:0"
|
17 |
+
TORCH_DTYPE = torch.float16
|
18 |
+
else:
|
19 |
+
DEVICE = "cpu"
|
20 |
+
TORCH_DTYPE = torch.float32
|
21 |
+
|
22 |
+
|
23 |
+
MODEL_NAME = 'openai/whisper-large-v3'
|
24 |
+
|
25 |
+
pipe = pipeline(
|
26 |
+
"automatic-speech-recognition",
|
27 |
+
model=MODEL_NAME,
|
28 |
+
torch_dtype=TORCH_DTYPE,
|
29 |
+
device=DEVICE,
|
30 |
+
)
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
def convert_audio_2_array(files):
|
36 |
+
file_paths = files
|
37 |
+
complaint_data = Dataset.from_dict({
|
38 |
+
"audio" : [file_paths] # path to be changed based on the path
|
39 |
+
}).cast_column("audio",Audio(sampling_rate=16000))
|
40 |
+
file_name = Path(file_paths).name
|
41 |
+
print(f"file_paths: \n {file_paths} and \n file_name: {file_name}and \n complaint_data : \n {complaint_data} ")
|
42 |
+
return file_name, complaint_data
|
43 |
+
|
44 |
+
|
45 |
+
def v2t_convertor(files):
|
46 |
+
file_name,inputs_dict = convert_audio_2_array(files)
|
47 |
+
|
48 |
+
input = inputs_dict[0] # selecting only one input
|
49 |
+
org_complain_dict = pipe(input["audio"].copy(), max_new_tokens=256, generate_kwargs={"task": "transcribe"})
|
50 |
+
print('f{org_complain_dict}')
|
51 |
+
org_complain = org_complain_dict['text']
|
52 |
+
|
53 |
+
# lang_token = pipe.model.generate(input, max_new_tokens=1)[0,1]
|
54 |
+
# language_code = pipe.tokenizer.decode(lang_token)
|
55 |
+
language_code = "hi" # hard coded for the time being
|
56 |
+
|
57 |
+
eng_complain = pipe(input["audio"].copy(), max_new_tokens=256, generate_kwargs={"task": "translate"})['text']
|
58 |
+
|
59 |
+
print(f"org_complain: \n {org_complain} \nand\n eng_complain:\n {eng_complain} \n language_code: {language_code}")
|
60 |
+
|
61 |
+
return [[file_name, org_complain, eng_complain, language_code]]
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
def upload_file(files): # the actual translation should happen here
|
66 |
"""
|
67 |
takes the file that comes from the UI and converts it to the respective
|
|
|
72 |
|
73 |
# Define the data as lists. Here the data would be sent in the form of single data fro mthe data
|
74 |
|
75 |
+
# data = [ ["ca_1.wav", "बिना किसी पूर्व सूचना के विलंबित या रद्द की गई ट्रिनिक", "without any prior information or any delay or delay in the train journey", "hindi"]]
|
76 |
|
77 |
+
data = v2t_convertor(files)
|
78 |
|
79 |
# Create the DataFrameoutputs
|
80 |
if data is not None:
|
|
|
97 |
datatype=["str", "str", "str"],
|
98 |
row_count=1,
|
99 |
col_count=(4, "fixed"),
|
100 |
+
wrap=True
|
101 |
)
|
102 |
upload_button.upload(upload_file, upload_button, df_output, show_progress = True) # upload the audio file and and sends to the upload function
|
103 |
|
104 |
|
105 |
|
106 |
if __name__ == "__main__":
|
107 |
+
demo.launch(debug=True)
|