Spaces:

ananyachavan
/

Speechemo

Sleeping

File size: 2,169 Bytes

203873a
 
 
3ec3c4a
 
 
 
 
 
7b31438
203873a
3ec3c4a
 
 
 
203873a
3ec3c4a
 
 
 
 
 
 
 
 
 
 
 
25d12aa
 
 
 
 
 
3ec3c4a
 
 
 
25d12aa
 
203873a
 
 
3ec3c4a
 
 
 
25d12aa
 
3ec3c4a
 
25d12aa
203873a
3ec3c4a
95de7b4
25d12aa

import torch
from transformers import pipeline
import gradio as gr
import whisper

# Load the Whisper model for transcription
whisper_model = whisper.load_model("base")

# Load the emotion recognition pipeline
emotion_recognition = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None)

# Function to transcribe audio
def transcribe_audio(audio_file):
    result = whisper_model.transcribe(audio_file)
    return result["text"]

# Function to transcribe audio and recognize emotions
def transcribe_and_recognize_emotions(audio_file):
    # Transcribe audio
    transcription = transcribe_audio(audio_file)

    # Recognize emotions of the transcribed text
    emotions = emotion_recognition(transcription)

    # Extract the emotion with the highest score
    dominant_emotion = max(emotions[0], key=lambda x: x['score'])['label']

    return transcription, dominant_emotion
    
# Simulated function to analyze speech patterns and prosody for mental health
def analyze_speech_for_mental_health(audio_file):
    # Here you would use a model or algorithm to analyze speech patterns, prosody, etc.
    # For demonstration purposes, we'll simulate this with a placeholder response.
    return "Simulated mental health analysis: No significant signs of depression or anxiety detected."

# Define the Gradio interface function
def gradio_transcription_emotion_interface(audio):
    transcription, emotion = transcribe_and_recognize_emotions(audio)
    mental_health_assessment = analyze_speech_for_mental_health(audio)
    return transcription, emotion, mental_health_assessment

# Set up Gradio Interface
iface = gr.Interface(
    fn=gradio_transcription_emotion_interface,
    inputs=gr.Audio(type="filepath"),
    outputs=[
        gr.Textbox(label="Transcription"),
        gr.Label(label="Dominant Emotion"),
        gr.Textbox(label="Mental Health Assessment")
    ],
    title="Audio Transcription and Emotion Recognition",
    description="Upload or record an audio file to get the transcription, recognize its dominant emotion, and receive a mental health assessment."
)
# Deploy the interface
iface.launch(debug=True)