import gradio as gr from huggingface_hub import InferenceClient from gtts import gTTS import os import tempfile import logging from typing import List, Dict, Generator, Tuple, Optional, Union """ IMMY ON device Alpha version with TTS A child-friendly chatbot interface with text-to-speech capabilities """ # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class ImmyChatbot: def __init__(self, model_name: str = "Daemontatox/IMMY3"): self.client = InferenceClient(model_name) @staticmethod def text_to_speech(text: str) -> str: """ Convert text to speech and return the path to the audio file Args: text: The text to convert to speech Returns: str: Path to the generated audio file """ try: tts = gTTS(text=text, lang='en') with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as fp: tts.save(fp.name) return fp.name except Exception as e: logger.error(f"Error in text_to_speech: {e}") raise def generate_response( self, message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, ) -> Generator[Union[str, Tuple[str, str]], None, None]: """ Generate chat response and audio Args: message: User input message history: Chat history system_message: System prompt max_tokens: Maximum tokens to generate temperature: Sampling temperature top_p: Nucleus sampling parameter Yields: Either a string (intermediate response) or tuple of (final_response, audio_path) """ messages = [{"role": "system", "content": system_message}] messages.extend(history) messages.append({"role": "user", "content": message}) try: response = "" for message in self.client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token yield response # Generate audio after complete response audio_path = self.text_to_speech(response) yield (response, audio_path) except Exception as e: logger.error(f"Error in generate_response: {e}") yield f"I'm sorry, I encountered an error: {str(e)}" def create_demo() -> gr.Blocks: """Create and configure the Gradio interface""" immy = ImmyChatbot() with gr.Blocks() as demo: chatbot = gr.Chatbot( type="messages", # Using the new messages format show_label=False ) msg = gr.Textbox( label="Message", placeholder="Type your message here...", show_label=False ) audio_output = gr.Audio( label="Response Audio", autoplay=True ) with gr.Accordion("Advanced Settings", open=False): system_message = gr.Textbox( value="You are Immy, a magical AI-powered teddy bear who loves chatting with children. " "You are kind, funny, and full of wonder, always ready to tell stories, answer " "questions, and offer friendly advice. Speak playfully and patiently, using simple, " "child-friendly language to encourage curiosity, learning, and imagination.", label="System Message", lines=4 ) max_tokens = gr.Slider( minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens" ) temperature = gr.Slider( minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)" ) def user(user_message: str, history: List[Dict[str, str]]) -> Tuple[str, List[Dict[str, str]]]: """Process user message and update history""" new_message = {"role": "user", "content": user_message} return "", history + [new_message] def bot( history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float ) -> Generator[Tuple[List[Dict[str, str]], Optional[str]], None, None]: """Process bot response and generate audio""" user_message = history[-1]["content"] for response in immy.generate_response( user_message, history[:-1], system_message, max_tokens, temperature, top_p ): if isinstance(response, tuple): # Final response with audio history.append({"role": "assistant", "content": response[0]}) yield history, response[1] else: # Intermediate response history.append({"role": "assistant", "content": response}) yield history, None # Set up the message submission chain msg.submit( user, [msg, chatbot], [msg, chatbot], queue=False ).then( bot, [chatbot, system_message, max_tokens, temperature, top_p], [chatbot, audio_output] ) return demo if __name__ == "__main__": demo = create_demo() demo.launch()