Spaces:

Dumele
/

viv-final-autotrain

Sleeping

App Files Files Community

viv-final-autotrain / app.py

OpeoluwaAdekoya

Update app.py (#2)

e4b60e9 verified 5 months ago

raw

history blame

2.82 kB

	import gradio as gr

	from huggingface_hub import login

	import torch
	# from datasets import Dataset
	# from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
	from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
	from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
	import torch

	# Define the repository where your model is saved
	model_repo = "Dumele/viv-updated2" # Replace with your actual repository

	# Load the tokenizer from the repository
	tokenizer = AutoTokenizer.from_pretrained(model_repo)

	# Define the configuration with `disable_exllama` set to True
	quantization_config = GPTQConfig(bits=4, disable_exllama=True)

	# Load the model with the custom configuration
	model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)

	# Move the model to GPU if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)




	from transformers import pipeline

	# Create a text generation pipeline
	text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

	# Define a prompt
	prompt = "###Human: Answer this question: What exactly does Viv do?\n###Assistant:"

	# Generate text
	generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)

	# Print the generated text
	print(generated_text[0]['generated_text'])

	# pip install gradio

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import gradio as gr

	# Define the repository where your model is saved
	model_repo = "Dumele/viv-updated2" # Replace with your actual repository name

	# Load the tokenizer from the repository
	tokenizer = AutoTokenizer.from_pretrained(model_repo)

	# Define the configuration with `disable_exllama` set to True
	quantization_config = GPTQConfig(bits=4, disable_exllama=True)

	# Load the model with the custom configuration
	model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)

	# Move the model to GPU if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	# Create a text generation pipeline
	text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

	def generate_response(prompt):
	generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)
	return generated_text[0]['generated_text']

	# Create a Gradio interface
	iface = gr.Interface(
	fn=generate_response,
	inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
	outputs="text",
	title="Chat with VivBeta",
	description="Enter a prompt to interact with the fine-tuned model."
	)

	iface.launch()