pknayak commited on
Commit
3295d39
1 Parent(s): 3bb42a8

Update app.py

Browse files

adding the actual code and testing it on the spaces

Files changed (1) hide show
  1. app.py +65 -2
app.py CHANGED
@@ -1,6 +1,67 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  def upload_file(files): # the actual translation should happen here
5
  """
6
  takes the file that comes from the UI and converts it to the respective
@@ -11,8 +72,9 @@ def upload_file(files): # the actual translation should happen here
11
 
12
  # Define the data as lists. Here the data would be sent in the form of single data fro mthe data
13
 
14
- data = [ ["ca_1.wav", "बिना किसी पूर्व सूचना के विलंबित या रद्द की गई ट्रिनिक", "without any prior information or any delay or delay in the train journey", "hindi"]]
15
 
 
16
 
17
  # Create the DataFrameoutputs
18
  if data is not None:
@@ -35,10 +97,11 @@ with gr.Blocks() as demo:
35
  datatype=["str", "str", "str"],
36
  row_count=1,
37
  col_count=(4, "fixed"),
 
38
  )
39
  upload_button.upload(upload_file, upload_button, df_output, show_progress = True) # upload the audio file and and sends to the upload function
40
 
41
 
42
 
43
  if __name__ == "__main__":
44
- demo.launch()
 
1
+
2
+ # imports for
3
+ from datasets import Audio, load_dataset, Dataset
4
+ import torch
5
+ from transformers import pipeline
6
+
7
+ from pathlib import Path
8
+
9
+ # for the UI by Gradio
10
  import gradio as gr
11
  import pandas as pd
12
 
13
+
14
+ # initializing the values for device
15
+ if torch.cuda.is_available():
16
+ DEVICE = "cuda:0"
17
+ TORCH_DTYPE = torch.float16
18
+ else:
19
+ DEVICE = "cpu"
20
+ TORCH_DTYPE = torch.float32
21
+
22
+
23
+ MODEL_NAME = 'openai/whisper-large-v3'
24
+
25
+ pipe = pipeline(
26
+ "automatic-speech-recognition",
27
+ model=MODEL_NAME,
28
+ torch_dtype=TORCH_DTYPE,
29
+ device=DEVICE,
30
+ )
31
+
32
+
33
+
34
+
35
+ def convert_audio_2_array(files):
36
+ file_paths = files
37
+ complaint_data = Dataset.from_dict({
38
+ "audio" : [file_paths] # path to be changed based on the path
39
+ }).cast_column("audio",Audio(sampling_rate=16000))
40
+ file_name = Path(file_paths).name
41
+ print(f"file_paths: \n {file_paths} and \n file_name: {file_name}and \n complaint_data : \n {complaint_data} ")
42
+ return file_name, complaint_data
43
+
44
+
45
+ def v2t_convertor(files):
46
+ file_name,inputs_dict = convert_audio_2_array(files)
47
+
48
+ input = inputs_dict[0] # selecting only one input
49
+ org_complain_dict = pipe(input["audio"].copy(), max_new_tokens=256, generate_kwargs={"task": "transcribe"})
50
+ print('f{org_complain_dict}')
51
+ org_complain = org_complain_dict['text']
52
+
53
+ # lang_token = pipe.model.generate(input, max_new_tokens=1)[0,1]
54
+ # language_code = pipe.tokenizer.decode(lang_token)
55
+ language_code = "hi" # hard coded for the time being
56
+
57
+ eng_complain = pipe(input["audio"].copy(), max_new_tokens=256, generate_kwargs={"task": "translate"})['text']
58
+
59
+ print(f"org_complain: \n {org_complain} \nand\n eng_complain:\n {eng_complain} \n language_code: {language_code}")
60
+
61
+ return [[file_name, org_complain, eng_complain, language_code]]
62
+
63
+
64
+
65
  def upload_file(files): # the actual translation should happen here
66
  """
67
  takes the file that comes from the UI and converts it to the respective
 
72
 
73
  # Define the data as lists. Here the data would be sent in the form of single data fro mthe data
74
 
75
+ # data = [ ["ca_1.wav", "बिना किसी पूर्व सूचना के विलंबित या रद्द की गई ट्रिनिक", "without any prior information or any delay or delay in the train journey", "hindi"]]
76
 
77
+ data = v2t_convertor(files)
78
 
79
  # Create the DataFrameoutputs
80
  if data is not None:
 
97
  datatype=["str", "str", "str"],
98
  row_count=1,
99
  col_count=(4, "fixed"),
100
+ wrap=True
101
  )
102
  upload_button.upload(upload_file, upload_button, df_output, show_progress = True) # upload the audio file and and sends to the upload function
103
 
104
 
105
 
106
  if __name__ == "__main__":
107
+ demo.launch(debug=True)