Adeenakk commited on
Commit
2d5eff6
1 Parent(s): 771bfb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -21
app.py CHANGED
@@ -6,10 +6,14 @@ import requests
6
  from gtts import gTTS
7
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
8
  from pydub import AudioSegment
 
9
 
10
- # Load environment variables for API keys
11
- RAPIDAPI_KEY = os.getenv('RAPIDAPI_LANG_TRANS')
12
- GROQ_API_KEY = os.getenv('GROQ_API_KEY')
 
 
 
13
 
14
  # Load the Whisper model
15
  processor = AutoProcessor.from_pretrained("ihanif/whisper-medium-urdu")
@@ -43,10 +47,21 @@ def process_audio(file_path):
43
 
44
  # Convert audio to numpy array for processing
45
  audio_samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0 # Normalize to [-1, 1] range
 
 
 
 
 
 
 
46
  audio_input = processor(audio_samples, return_tensors="pt", sampling_rate=16000)
47
 
48
  # Transcribe the audio using the fine-tuned Whisper model
49
- result = model.generate(**audio_input)
 
 
 
 
50
  text = processor.batch_decode(result, skip_special_tokens=True)[0]
51
 
52
  if not text.strip(): # Check if the transcribed text is empty
@@ -58,22 +73,15 @@ def process_audio(file_path):
58
  urdu_to_eng = translate("en", text)
59
  print(f"Translated Text (English): {urdu_to_eng}") # Debugging step
60
 
61
- # Make API call to Groq
62
- groq_url = "https://api.groq.com/your-endpoint" # Replace with actual Groq API endpoint
63
- groq_headers = {
64
- "Authorization": f"Bearer {GROQ_API_KEY}",
65
- "Content-Type": "application/json"
66
- }
67
- groq_payload = {
68
- "messages": [{"role": "user", "content": urdu_to_eng}],
69
- "model": "llama3-8b-8192", # Adjust model if needed
70
- "max_tokens": 50
71
- }
72
- response = requests.post(groq_url, json=groq_payload, headers=groq_headers)
73
- chat_completion = response.json()
74
-
75
- # Access the response
76
- response_message = chat_completion["choices"][0]["message"]["content"].strip()
77
  print(f"Groq Response (English): {response_message}") # Debugging step
78
 
79
  # Translate the response text back to Urdu
@@ -104,4 +112,4 @@ iface = gr.Interface(
104
  live=True
105
  )
106
 
107
- iface.launch()
 
6
  from gtts import gTTS
7
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
8
  from pydub import AudioSegment
9
+ from groq import Groq
10
 
11
+ from google.colab import userdata
12
+ RAPIDAPI_KEY = userdata.get('RAPIDAPI_LANG_TRANS')
13
+ GROQ_API_KEY = userdata.get('GROQ_API_KEY')
14
+
15
+ # Initialize the Groq client
16
+ client = Groq(api_key=GROQ_API_KEY)
17
 
18
  # Load the Whisper model
19
  processor = AutoProcessor.from_pretrained("ihanif/whisper-medium-urdu")
 
47
 
48
  # Convert audio to numpy array for processing
49
  audio_samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0 # Normalize to [-1, 1] range
50
+
51
+ # Create attention mask
52
+ # Assume padding length is determined by the maximum length of sequences
53
+ # For simplicity, we'll just create a mask where all values are 1 (no padding)
54
+ # In practice, you would adjust this based on actual sequence length
55
+ attention_mask = np.ones_like(audio_samples, dtype=np.int64)
56
+
57
  audio_input = processor(audio_samples, return_tensors="pt", sampling_rate=16000)
58
 
59
  # Transcribe the audio using the fine-tuned Whisper model
60
+ # Pass the attention mask as well
61
+ result = model.generate(
62
+ **audio_input,
63
+ attention_mask=torch.tensor(attention_mask).unsqueeze(0) # Add batch dimension
64
+ )
65
  text = processor.batch_decode(result, skip_special_tokens=True)[0]
66
 
67
  if not text.strip(): # Check if the transcribed text is empty
 
73
  urdu_to_eng = translate("en", text)
74
  print(f"Translated Text (English): {urdu_to_eng}") # Debugging step
75
 
76
+ # Generate a response using Groq
77
+ chat_completion = client.chat.completions.create(
78
+ messages=[{"role": "user", "content": urdu_to_eng}],
79
+ model="llama3-8b-8192", # Ensure the model supports Urdu if possible
80
+ max_tokens=50
81
+ )
82
+
83
+ # Access the response using dot notation
84
+ response_message = chat_completion.choices[0].message.content.strip()
 
 
 
 
 
 
 
85
  print(f"Groq Response (English): {response_message}") # Debugging step
86
 
87
  # Translate the response text back to Urdu
 
112
  live=True
113
  )
114
 
115
+ iface.launch(share=True)