File size: 2,816 Bytes
abaea54
 
c9c4d75
 
 
9c2ae81
 
c9c4d75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4b60e9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr

from huggingface_hub import login

import torch
# from datasets import Dataset
# from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
import torch

# Define the repository where your model is saved
model_repo = "Dumele/viv-updated2"  # Replace with your actual repository

# Load the tokenizer from the repository
tokenizer = AutoTokenizer.from_pretrained(model_repo)

# Define the configuration with `disable_exllama` set to True
quantization_config = GPTQConfig(bits=4, disable_exllama=True)

# Load the model with the custom configuration
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)




from transformers import pipeline

# Create a text generation pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# Define a prompt
prompt = "###Human: Answer this question: What exactly does Viv do?\n###Assistant:"

# Generate text
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)

# Print the generated text
print(generated_text[0]['generated_text'])

# pip install gradio

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr

# Define the repository where your model is saved
model_repo = "Dumele/viv-updated2"  # Replace with your actual repository name

# Load the tokenizer from the repository
tokenizer = AutoTokenizer.from_pretrained(model_repo)

# Define the configuration with `disable_exllama` set to True
quantization_config = GPTQConfig(bits=4, disable_exllama=True)

# Load the model with the custom configuration
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Create a text generation pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

def generate_response(prompt):
    generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)
    return generated_text[0]['generated_text']

# Create a Gradio interface
iface = gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
    outputs="text",
    title="Chat with VivBeta",
    description="Enter a prompt to interact with the fine-tuned model."
)

iface.launch()