Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import login | |
import torch | |
# from datasets import Dataset | |
# from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model | |
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments | |
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig | |
import torch | |
# Define the repository where your model is saved | |
model_repo = "Dumele/viv-updated2" # Replace with your actual repository | |
# Load the tokenizer from the repository | |
tokenizer = AutoTokenizer.from_pretrained(model_repo) | |
# Define the configuration with `disable_exllama` set to True | |
quantization_config = GPTQConfig(bits=4, disable_exllama=True) | |
# Load the model with the custom configuration | |
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config) | |
# Move the model to GPU if available | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
from transformers import pipeline | |
# Create a text generation pipeline | |
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) | |
# Define a prompt | |
prompt = "###Human: Answer this question: What exactly does Viv do?\n###Assistant:" | |
# Generate text | |
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1) | |
# Print the generated text | |
print(generated_text[0]['generated_text']) | |
# pip install gradio | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import gradio as gr | |
# Define the repository where your model is saved | |
model_repo = "Dumele/viv-updated2" # Replace with your actual repository name | |
# Load the tokenizer from the repository | |
tokenizer = AutoTokenizer.from_pretrained(model_repo) | |
# Define the configuration with `disable_exllama` set to True | |
quantization_config = GPTQConfig(bits=4, disable_exllama=True) | |
# Load the model with the custom configuration | |
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config) | |
# Move the model to GPU if available | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
# Create a text generation pipeline | |
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) | |
def generate_response(prompt): | |
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1) | |
return generated_text[0]['generated_text'] | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=generate_response, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), | |
outputs="text", | |
title="Chat with VivBeta", | |
description="Enter a prompt to interact with the fine-tuned model." | |
) | |
iface.launch() | |