import torch from transformers import pipeline import gradio as gr import whisper # Load the Whisper model for transcription whisper_model = whisper.load_model("base") # Load the emotion recognition pipeline emotion_recognition = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None) # Function to transcribe audio def transcribe_audio(audio_file): result = whisper_model.transcribe(audio_file) return result["text"] # Function to transcribe audio and recognize emotions def transcribe_and_recognize_emotions(audio_file): # Transcribe audio transcription = transcribe_audio(audio_file) # Recognize emotions of the transcribed text emotions = emotion_recognition(transcription) # Extract the emotion with the highest score dominant_emotion = max(emotions[0], key=lambda x: x['score'])['label'] return transcription, dominant_emotion # Simulated function to analyze speech patterns and prosody for mental health def analyze_speech_for_mental_health(audio_file): # Here you would use a model or algorithm to analyze speech patterns, prosody, etc. # For demonstration purposes, we'll simulate this with a placeholder response. return "Simulated mental health analysis: No significant signs of depression or anxiety detected." # Define the Gradio interface function def gradio_transcription_emotion_interface(audio): transcription, emotion = transcribe_and_recognize_emotions(audio) mental_health_assessment = analyze_speech_for_mental_health(audio) return transcription, emotion, mental_health_assessment # Set up Gradio Interface iface = gr.Interface( fn=gradio_transcription_emotion_interface, inputs=gr.Audio(type="filepath"), outputs=[ gr.Textbox(label="Transcription"), gr.Label(label="Dominant Emotion"), gr.Textbox(label="Mental Health Assessment") ], title="Audio Transcription and Emotion Recognition", description="Upload or record an audio file to get the transcription, recognize its dominant emotion, and receive a mental health assessment." ) # Deploy the interface iface.launch(debug=True)