OpeoluwaAdekoya's picture
Update app.py
9c2ae81 verified
raw
history blame
6.26 kB
import gradio as gr
from huggingface_hub import login
# ! pip install accelerate peft bitsandbytes pip install git+https://github.com/huggingface/transformers trl py7zr auto-gptq optimum
import torch
# from datasets import Dataset
# from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
# from trl import SFTTrainer
# import pandas as pd
# import json
# import pandas as pd
# def load_data_to_dataframe(json_file_path):
# """
# Load data from a JSON file and create a DataFrame with questions and answers.
# Args:
# json_file_path (str): Path to the JSON file.
# Returns:
# pd.DataFrame: DataFrame containing the questions and answers.
# """
# questions = []
# answers = []
# with open(json_file_path, 'r') as f:
# data = json.load(f)
# for entry in data:
# for message in entry["messages"]:
# if message["role"] == "user":
# questions.append(message["content"])
# elif message["role"] == "assistant":
# answers.append(message["content"])
# # Create DataFrame
# df = pd.DataFrame({
# 'question': questions,
# 'answer': answers
# })
# return df
# def finetune_mistral_7b():
# # Replace 'your_token' with your actual Hugging Face token
# json_file_path = 'Dataset for finetuning Viv.json'
# df = load_data_to_dataframe(json_file_path)
# df["text"] = df[["question", "answer"]].apply(lambda x: "###Human: Answer this question: " + x["question"] + "\n###Assistant: " +x["answer"], axis=1)
# print(df.iloc[0])
# data = Dataset.from_pandas(df)
# tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")
# tokenizer.pad_token = tokenizer.eos_token
# quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True, tokenizer=tokenizer)
# model = AutoModelForCausalLM.from_pretrained(
# "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ",
# quantization_config=quantization_config_loading,
# device_map="auto"
# )
# print(model)
# model.config.use_cache = False
# model.config.pretraining_tp = 1
# model.gradient_checkpointing_enable()
# model = prepare_model_for_kbit_training(model)
# peft_config = LoraConfig(
# r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"]
# )
# model = get_peft_model(model, peft_config)
# training_arguments = TrainingArguments(
# output_dir="mistral-finetuned-Viv",
# per_device_train_batch_size=8,
# gradient_accumulation_steps=1,
# optim="paged_adamw_32bit",
# learning_rate=2e-4,
# lr_scheduler_type="cosine",
# save_strategy="epoch",
# logging_steps=100,
# num_train_epochs=1,
# max_steps=100,
# fp16=True,
# push_to_hub=True,
# hub_model_id="Dumele/viv-updated2", # Specify the repository name
# hub_strategy="every_save"
# )
# trainer = SFTTrainer(
# model=model,
# train_dataset=data,
# peft_config=peft_config,
# dataset_text_field="text",
# args=training_arguments,
# tokenizer=tokenizer,
# packing=False,
# max_seq_length=512
# )
# trainer.train()
# trainer.push_to_hub()
# if __name__ == "__main__":
# finetune_mistral_7b()
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
import torch
# Define the repository where your model is saved
model_repo = "Dumele/viv-updated2" # Replace with your actual repository
# Load the tokenizer from the repository
tokenizer = AutoTokenizer.from_pretrained(model_repo)
# Define the configuration with `disable_exllama` set to True
quantization_config = GPTQConfig(bits=4, disable_exllama=True)
# Load the model with the custom configuration
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)
# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
from transformers import pipeline
# Create a text generation pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
# Define a prompt
prompt = "###Human: Answer this question: What exactly does Viv do?\n###Assistant:"
# Generate text
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)
# Print the generated text
print(generated_text[0]['generated_text'])
# pip install gradio
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr
# Define the repository where your model is saved
model_repo = "Dumele/viv-updated2" # Replace with your actual repository name
# Load the tokenizer from the repository
tokenizer = AutoTokenizer.from_pretrained(model_repo)
# Define the configuration with `disable_exllama` set to True
quantization_config = GPTQConfig(bits=4, disable_exllama=True)
# Load the model with the custom configuration
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)
# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Create a text generation pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
def generate_response(prompt):
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)
return generated_text[0]['generated_text']
# Create a Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
outputs="text",
title="Chat with VivBeta",
description="Enter a prompt to interact with the fine-tuned model."
)
iface.launch()
# Commented out IPython magic to ensure Python compatibility.
# %%bash
#