OpeoluwaAdekoya's picture
Update app.py (#2)
e4b60e9 verified
raw
history blame
2.82 kB
import gradio as gr
from huggingface_hub import login
import torch
# from datasets import Dataset
# from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
import torch
# Define the repository where your model is saved
model_repo = "Dumele/viv-updated2" # Replace with your actual repository
# Load the tokenizer from the repository
tokenizer = AutoTokenizer.from_pretrained(model_repo)
# Define the configuration with `disable_exllama` set to True
quantization_config = GPTQConfig(bits=4, disable_exllama=True)
# Load the model with the custom configuration
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)
# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
from transformers import pipeline
# Create a text generation pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
# Define a prompt
prompt = "###Human: Answer this question: What exactly does Viv do?\n###Assistant:"
# Generate text
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)
# Print the generated text
print(generated_text[0]['generated_text'])
# pip install gradio
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr
# Define the repository where your model is saved
model_repo = "Dumele/viv-updated2" # Replace with your actual repository name
# Load the tokenizer from the repository
tokenizer = AutoTokenizer.from_pretrained(model_repo)
# Define the configuration with `disable_exllama` set to True
quantization_config = GPTQConfig(bits=4, disable_exllama=True)
# Load the model with the custom configuration
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)
# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Create a text generation pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
def generate_response(prompt):
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)
return generated_text[0]['generated_text']
# Create a Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
outputs="text",
title="Chat with VivBeta",
description="Enter a prompt to interact with the fine-tuned model."
)
iface.launch()