Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import login | |
# ! pip install accelerate peft bitsandbytes pip install git+https://github.com/huggingface/transformers trl py7zr auto-gptq optimum | |
import torch | |
# from datasets import Dataset | |
# from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model | |
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments | |
# from trl import SFTTrainer | |
# import pandas as pd | |
# import json | |
# import pandas as pd | |
# def load_data_to_dataframe(json_file_path): | |
# """ | |
# Load data from a JSON file and create a DataFrame with questions and answers. | |
# Args: | |
# json_file_path (str): Path to the JSON file. | |
# Returns: | |
# pd.DataFrame: DataFrame containing the questions and answers. | |
# """ | |
# questions = [] | |
# answers = [] | |
# with open(json_file_path, 'r') as f: | |
# data = json.load(f) | |
# for entry in data: | |
# for message in entry["messages"]: | |
# if message["role"] == "user": | |
# questions.append(message["content"]) | |
# elif message["role"] == "assistant": | |
# answers.append(message["content"]) | |
# # Create DataFrame | |
# df = pd.DataFrame({ | |
# 'question': questions, | |
# 'answer': answers | |
# }) | |
# return df | |
# def finetune_mistral_7b(): | |
# # Replace 'your_token' with your actual Hugging Face token | |
# json_file_path = 'Dataset for finetuning Viv.json' | |
# df = load_data_to_dataframe(json_file_path) | |
# df["text"] = df[["question", "answer"]].apply(lambda x: "###Human: Answer this question: " + x["question"] + "\n###Assistant: " +x["answer"], axis=1) | |
# print(df.iloc[0]) | |
# data = Dataset.from_pandas(df) | |
# tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ") | |
# tokenizer.pad_token = tokenizer.eos_token | |
# quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True, tokenizer=tokenizer) | |
# model = AutoModelForCausalLM.from_pretrained( | |
# "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ", | |
# quantization_config=quantization_config_loading, | |
# device_map="auto" | |
# ) | |
# print(model) | |
# model.config.use_cache = False | |
# model.config.pretraining_tp = 1 | |
# model.gradient_checkpointing_enable() | |
# model = prepare_model_for_kbit_training(model) | |
# peft_config = LoraConfig( | |
# r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"] | |
# ) | |
# model = get_peft_model(model, peft_config) | |
# training_arguments = TrainingArguments( | |
# output_dir="mistral-finetuned-Viv", | |
# per_device_train_batch_size=8, | |
# gradient_accumulation_steps=1, | |
# optim="paged_adamw_32bit", | |
# learning_rate=2e-4, | |
# lr_scheduler_type="cosine", | |
# save_strategy="epoch", | |
# logging_steps=100, | |
# num_train_epochs=1, | |
# max_steps=100, | |
# fp16=True, | |
# push_to_hub=True, | |
# hub_model_id="Dumele/viv-updated2", # Specify the repository name | |
# hub_strategy="every_save" | |
# ) | |
# trainer = SFTTrainer( | |
# model=model, | |
# train_dataset=data, | |
# peft_config=peft_config, | |
# dataset_text_field="text", | |
# args=training_arguments, | |
# tokenizer=tokenizer, | |
# packing=False, | |
# max_seq_length=512 | |
# ) | |
# trainer.train() | |
# trainer.push_to_hub() | |
# if __name__ == "__main__": | |
# finetune_mistral_7b() | |
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig | |
import torch | |
# Define the repository where your model is saved | |
model_repo = "Dumele/viv-updated2" # Replace with your actual repository | |
# Load the tokenizer from the repository | |
tokenizer = AutoTokenizer.from_pretrained(model_repo) | |
# Define the configuration with `disable_exllama` set to True | |
quantization_config = GPTQConfig(bits=4, disable_exllama=True) | |
# Load the model with the custom configuration | |
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config) | |
# Move the model to GPU if available | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
from transformers import pipeline | |
# Create a text generation pipeline | |
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) | |
# Define a prompt | |
prompt = "###Human: Answer this question: What exactly does Viv do?\n###Assistant:" | |
# Generate text | |
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1) | |
# Print the generated text | |
print(generated_text[0]['generated_text']) | |
# pip install gradio | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import gradio as gr | |
# Define the repository where your model is saved | |
model_repo = "Dumele/viv-updated2" # Replace with your actual repository name | |
# Load the tokenizer from the repository | |
tokenizer = AutoTokenizer.from_pretrained(model_repo) | |
# Define the configuration with `disable_exllama` set to True | |
quantization_config = GPTQConfig(bits=4, disable_exllama=True) | |
# Load the model with the custom configuration | |
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config) | |
# Move the model to GPU if available | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
# Create a text generation pipeline | |
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) | |
def generate_response(prompt): | |
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1) | |
return generated_text[0]['generated_text'] | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=generate_response, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), | |
outputs="text", | |
title="Chat with VivBeta", | |
description="Enter a prompt to interact with the fine-tuned model." | |
) | |
iface.launch() | |
# Commented out IPython magic to ensure Python compatibility. | |
# %%bash | |
# | |