Spaces:

Dumele
/

viv-final-autotrain

Sleeping

App Files Files Community

viv-final-autotrain / app.py

OpeoluwaAdekoya

Update app.py

9c2ae81 verified 5 months ago

raw

history blame

6.26 kB

	import gradio as gr

	from huggingface_hub import login

	# ! pip install accelerate peft bitsandbytes pip install git+https://github.com/huggingface/transformers trl py7zr auto-gptq optimum

	import torch
	# from datasets import Dataset
	# from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
	from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
	# from trl import SFTTrainer
	# import pandas as pd

	# import json
	# import pandas as pd

	# def load_data_to_dataframe(json_file_path):
	# """
	# Load data from a JSON file and create a DataFrame with questions and answers.

	# Args:
	# json_file_path (str): Path to the JSON file.

	# Returns:
	# pd.DataFrame: DataFrame containing the questions and answers.
	# """
	# questions = []
	# answers = []

	# with open(json_file_path, 'r') as f:
	# data = json.load(f)

	# for entry in data:
	# for message in entry["messages"]:
	# if message["role"] == "user":
	# questions.append(message["content"])
	# elif message["role"] == "assistant":
	# answers.append(message["content"])

	# # Create DataFrame
	# df = pd.DataFrame({
	# 'question': questions,
	# 'answer': answers
	# })

	# return df

	# def finetune_mistral_7b():
	# # Replace 'your_token' with your actual Hugging Face token
	# json_file_path = 'Dataset for finetuning Viv.json'
	# df = load_data_to_dataframe(json_file_path)
	# df["text"] = df[["question", "answer"]].apply(lambda x: "###Human: Answer this question: " + x["question"] + "\n###Assistant: " +x["answer"], axis=1)
	# print(df.iloc[0])
	# data = Dataset.from_pandas(df)
	# tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")
	# tokenizer.pad_token = tokenizer.eos_token
	# quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True, tokenizer=tokenizer)
	# model = AutoModelForCausalLM.from_pretrained(
	# "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ",
	# quantization_config=quantization_config_loading,
	# device_map="auto"
	# )

	# print(model)
	# model.config.use_cache = False
	# model.config.pretraining_tp = 1
	# model.gradient_checkpointing_enable()
	# model = prepare_model_for_kbit_training(model)

	# peft_config = LoraConfig(
	# r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"]
	# )
	# model = get_peft_model(model, peft_config)

	# training_arguments = TrainingArguments(
	# output_dir="mistral-finetuned-Viv",
	# per_device_train_batch_size=8,
	# gradient_accumulation_steps=1,
	# optim="paged_adamw_32bit",
	# learning_rate=2e-4,
	# lr_scheduler_type="cosine",
	# save_strategy="epoch",
	# logging_steps=100,
	# num_train_epochs=1,
	# max_steps=100,
	# fp16=True,
	# push_to_hub=True,
	# hub_model_id="Dumele/viv-updated2", # Specify the repository name
	# hub_strategy="every_save"
	# )

	# trainer = SFTTrainer(
	# model=model,
	# train_dataset=data,
	# peft_config=peft_config,
	# dataset_text_field="text",
	# args=training_arguments,
	# tokenizer=tokenizer,
	# packing=False,
	# max_seq_length=512
	# )

	# trainer.train()
	# trainer.push_to_hub()

	# if __name__ == "__main__":
	# finetune_mistral_7b()







	from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
	import torch

	# Define the repository where your model is saved
	model_repo = "Dumele/viv-updated2" # Replace with your actual repository

	# Load the tokenizer from the repository
	tokenizer = AutoTokenizer.from_pretrained(model_repo)

	# Define the configuration with `disable_exllama` set to True
	quantization_config = GPTQConfig(bits=4, disable_exllama=True)

	# Load the model with the custom configuration
	model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)

	# Move the model to GPU if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)




	from transformers import pipeline

	# Create a text generation pipeline
	text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

	# Define a prompt
	prompt = "###Human: Answer this question: What exactly does Viv do?\n###Assistant:"

	# Generate text
	generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)

	# Print the generated text
	print(generated_text[0]['generated_text'])

	# pip install gradio

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import gradio as gr

	# Define the repository where your model is saved
	model_repo = "Dumele/viv-updated2" # Replace with your actual repository name

	# Load the tokenizer from the repository
	tokenizer = AutoTokenizer.from_pretrained(model_repo)

	# Define the configuration with `disable_exllama` set to True
	quantization_config = GPTQConfig(bits=4, disable_exllama=True)

	# Load the model with the custom configuration
	model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)

	# Move the model to GPU if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	# Create a text generation pipeline
	text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

	def generate_response(prompt):
	generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)
	return generated_text[0]['generated_text']

	# Create a Gradio interface
	iface = gr.Interface(
	fn=generate_response,
	inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
	outputs="text",
	title="Chat with VivBeta",
	description="Enter a prompt to interact with the fine-tuned model."
	)

	iface.launch()

	# Commented out IPython magic to ensure Python compatibility.
	# %%bash
	#