Spaces:

pknayak
/

bhashini_techathon

Sleeping

App Files Files Community

pknayak commited on Aug 21

Commit

3295d39

•

1 Parent(s): 3bb42a8

Update app.py

Browse files

adding the actual code and testing it on the spaces

Files changed (1) hide show

app.py +65 -2

app.py CHANGED Viewed

@@ -1,6 +1,67 @@
 import gradio as gr
 import pandas as pd
 def upload_file(files): # the actual translation should happen here
     """
     takes the file that comes from the UI and converts it to the respective
@@ -11,8 +72,9 @@ def upload_file(files): # the actual translation should happen here
     # Define the data as lists. Here the data would be sent in the form of single data fro mthe data
-    data = [ ["ca_1.wav", "बिना किसी पूर्व सूचना के विलंबित या रद्द की गई ट्रिनिक", "without any prior information or any delay or delay in the train journey", "hindi"]]
     # Create the DataFrameoutputs
     if data is not None:
@@ -35,10 +97,11 @@ with gr.Blocks() as demo:
             datatype=["str", "str", "str"],
             row_count=1,
             col_count=(4, "fixed"),
     )
   upload_button.upload(upload_file, upload_button, df_output, show_progress = True) # upload the audio file and and sends to the upload function
 if __name__ == "__main__":
-    demo.launch()

+# imports for
+from datasets import Audio, load_dataset, Dataset
+import torch
+from transformers import pipeline
+from pathlib import Path
+# for the UI by Gradio
 import gradio as gr
 import pandas as pd
+# initializing the values for device
+if torch.cuda.is_available():
+    DEVICE = "cuda:0"
+    TORCH_DTYPE = torch.float16
+else:
+    DEVICE = "cpu"
+    TORCH_DTYPE = torch.float32
+MODEL_NAME = 'openai/whisper-large-v3'
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=MODEL_NAME,
+    torch_dtype=TORCH_DTYPE,
+    device=DEVICE,
+)
+def convert_audio_2_array(files):
+  file_paths = files
+  complaint_data = Dataset.from_dict({
+      "audio" :  [file_paths] # path to be changed based on the path
+      }).cast_column("audio",Audio(sampling_rate=16000))
+  file_name = Path(file_paths).name
+  print(f"file_paths: \n {file_paths} and \n file_name: {file_name}and \n complaint_data : \n {complaint_data} ")
+  return file_name, complaint_data
+def v2t_convertor(files):
+  file_name,inputs_dict =  convert_audio_2_array(files)
+  input = inputs_dict[0] # selecting only one input
+  org_complain_dict = pipe(input["audio"].copy(), max_new_tokens=256, generate_kwargs={"task": "transcribe"})
+  print('f{org_complain_dict}')
+  org_complain = org_complain_dict['text']
+  # lang_token = pipe.model.generate(input, max_new_tokens=1)[0,1]
+  # language_code = pipe.tokenizer.decode(lang_token)
+  language_code = "hi" # hard coded for the time being
+  eng_complain = pipe(input["audio"].copy(), max_new_tokens=256, generate_kwargs={"task": "translate"})['text']
+  print(f"org_complain: \n {org_complain} \nand\n eng_complain:\n {eng_complain} \n language_code: {language_code}")
+  return [[file_name, org_complain, eng_complain, language_code]]
 def upload_file(files): # the actual translation should happen here
     """
     takes the file that comes from the UI and converts it to the respective
     # Define the data as lists. Here the data would be sent in the form of single data fro mthe data
+    # data = [ ["ca_1.wav", "बिना किसी पूर्व सूचना के विलंबित या रद्द की गई ट्रिनिक", "without any prior information or any delay or delay in the train journey", "hindi"]]
+    data = v2t_convertor(files)
     # Create the DataFrameoutputs
     if data is not None:
             datatype=["str", "str", "str"],
             row_count=1,
             col_count=(4, "fixed"),
+            wrap=True
     )
   upload_button.upload(upload_file, upload_button, df_output, show_progress = True) # upload the audio file and and sends to the upload function
 if __name__ == "__main__":
+    demo.launch(debug=True)