import gradio as gr import random import time import os from huggingface_hub import InferenceClient endpoint_url = os.getenv('url') hf_token = os.getenv('hf_token') # Streaming Client client = InferenceClient(endpoint_url, token=hf_token) gen_kwargs = dict( max_new_tokens=1024, top_k=30, top_p=0.9, temperature=0.2, repetition_penalty=1.05, #1.02 stop=["\nUser:", "<|endoftext|>", ""], ) def generate_text(prompt): """Generates text using the Hugging Face Inference API.""" chat_prompt = f""" ### Instruction: You are a chatbot. Chat in Urdu ### Input: {prompt} ### Response: "" """ stream = client.text_generation(chat_prompt, stream=True, details=True, **gen_kwargs) generated_text = "" for r in stream: if r.token.special: continue if r.token.text in gen_kwargs["stop"]: break generated_text += r.token.text yield generated_text iface = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), outputs="text", title="Urdu Chatbot", description="Ask me anything in Urdu!", ) iface.launch()