import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load the model and tokenizer # Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("valeriojob/MedGPT-Llama3.1-8B-BA-v.1") model = AutoModelForCausalLM.from_pretrained("valeriojob/MedGPT-Llama3.1-8B-BA-v.1") def respond_to_query(user_input): input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors="pt") # Generate a response from the model with torch.no_grad(): response_ids = model.generate(input_ids, max_length=150, num_return_sequences=1) response = tokenizer.decode(response_ids[0], skip_special_tokens=True) return response # Create a Gradio interface iface = gr.Interface(fn=respond_to_query, inputs="text", outputs="text", title="MedGPT Chatbot", description="Ask your medical questions to MedGPT!") if __name__ == "__main__": iface.launch()