OpeoluwaAdekoya commited on
Commit
11b9c39
1 Parent(s): a65a1fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -210
app.py CHANGED
@@ -1,211 +1,33 @@
1
  import gradio as gr
2
- import os
3
- import spaces
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
- from threading import Thread
6
- # Set an environment variable
7
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
8
- DESCRIPTION = '''
9
- <div>
10
- <h1 style="text-align: center;">Fine-Tuned Viv Model</h1>
11
- <p>This Space demonstrates the instruction-tuned model <a href="https://huggingface.co/Dumele/viv-updated"><b>Dumele/viv-updated</b></a>. Feel free to play with it, or duplicate to run privately!</p>
12
- <p>:mag_right: For more details about the release and how to use the model with <code>transformers</code>, visit the model-card linked above.</p>
13
- </div>
14
- '''
15
- PLACEHOLDER = """
16
- <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
17
- <p style="font-size: 20px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
18
- </div>
19
- """
20
- css = """
21
- h1 {
22
- text-align: center;
23
- display: block;
24
- }
25
- #duplicate-button {
26
- margin: auto;
27
- color: white;
28
- background: #1565C0;
29
- border-radius: 100vh;
30
- }
31
- """
32
- # Load the tokenizer and model
33
- tokenizer = AutoTokenizer.from_pretrained("Dumele/viv-updated")
34
- model = AutoModelForCausalLM.from_pretrained("Dumele/viv-updated", device_map="auto")
35
- terminators = [
36
- tokenizer.eos_token_id,
37
- tokenizer.convert_tokens_to_ids("")
38
- ]
39
- @spaces.GPU(duration=120)
40
- def chat_viv_updated(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
41
- """
42
- Generate a streaming response using the fine-tuned model.
43
- Args:
44
- message (str): The input message.
45
- history (list): The conversation history used by ChatInterface.
46
- temperature (float): The temperature for generating the response.
47
- max_new_tokens (int): The maximum number of new tokens to generate.
48
- Returns:
49
- str: The generated response.
50
- """
51
- conversation = []
52
- for user, assistant in history:
53
- conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
54
- conversation.append({"role": "user", "content": message})
55
- input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
56
- streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
57
- generate_kwargs = dict(
58
- input_ids= input_ids,
59
- streamer=streamer,
60
- max_new_tokens=max_new_tokens,
61
- do_sample=True,
62
- temperature=temperature,
63
- eos_token_id=terminators,
64
- )
65
- if temperature == 0:
66
- generate_kwargs['do_sample'] = False
67
- t = Thread(target=model.generate, kwargs=generate_kwargs)
68
- t.start()
69
- outputs = []
70
- for text in streamer:
71
- outputs.append(text)
72
- yield "".join(outputs)
73
- # Gradio block
74
- chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
75
- with gr.Blocks(fill_height=True, css=css) as demo:
76
- gr.Markdown(DESCRIPTION)
77
- gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
78
- gr.ChatInterface(
79
- fn=chat_viv_updated,
80
- chatbot=chatbot,
81
- fill_height=True,
82
- additional_inputs_accordion=gr.Accordion(label=":gear: Parameters", open=False, render=False),
83
- additional_inputs=[
84
- gr.Slider(minimum=0,
85
- maximum=1,
86
- step=0.1,
87
- value=0.95,
88
- label="Temperature",
89
- render=False),
90
- gr.Slider(minimum=128,
91
- maximum=4096,
92
- step=1,
93
- value=512,
94
- label="Max new tokens",
95
- render=False ),
96
- ],
97
- examples=[
98
- ['How to setup a human base on Mars? Give a short answer.'],
99
- ['Explain the theory of relativity to me like I’m 8 years old.'],
100
- ['What is 9,000 * 9,000?'],
101
- ['Write a pun-filled happy birthday message to my friend Alex.'],
102
- ['Justify why a penguin might make a good king of the jungle.']
103
- ],
104
- cache_examples=False,
105
- )
106
- if __name__ == "__main__":
107
- demo.launch()
108
-
109
- import gradio as gr
110
- import os
111
- import spaces
112
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
113
- from threading import Thread
114
- # Set an environment variable
115
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
116
- DESCRIPTION = '''
117
- <div>
118
- <h1 style="text-align: center;">Fine-Tuned Viv Model</h1>
119
- <p>This Space demonstrates the instruction-tuned model <a href="https://huggingface.co/Dumele/viv-updated"><b>Dumele/viv-updated</b></a>. Feel free to play with it, or duplicate to run privately!</p>
120
- <p>:mag_right: For more details about the release and how to use the model with <code>transformers</code>, visit the model-card linked above.</p>
121
- </div>
122
- '''
123
- PLACEHOLDER = """
124
- <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
125
- <p style="font-size: 20px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
126
- </div>
127
- """
128
- css = """
129
- h1 {
130
- text-align: center;
131
- display: block;
132
- }
133
- #duplicate-button {
134
- margin: auto;
135
- color: white;
136
- background: #1565C0;
137
- border-radius: 100vh;
138
- }
139
- """
140
- # Load the tokenizer and model
141
- tokenizer = AutoTokenizer.from_pretrained("Dumele/viv-updated")
142
- model = AutoModelForCausalLM.from_pretrained("Dumele/viv-updated", device_map="auto")
143
- terminators = [
144
- tokenizer.eos_token_id,
145
- tokenizer.convert_tokens_to_ids("")
146
- ]
147
- @spaces.GPU(duration=120)
148
- def chat_viv_updated(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
149
- """
150
- Generate a streaming response using the fine-tuned model.
151
- Args:
152
- message (str): The input message.
153
- history (list): The conversation history used by ChatInterface.
154
- temperature (float): The temperature for generating the response.
155
- max_new_tokens (int): The maximum number of new tokens to generate.
156
- Returns:
157
- str: The generated response.
158
- """
159
- conversation = []
160
- for user, assistant in history:
161
- conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
162
- conversation.append({"role": "user", "content": message})
163
- input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
164
- streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
165
- generate_kwargs = dict(
166
- input_ids= input_ids,
167
- streamer=streamer,
168
- max_new_tokens=max_new_tokens,
169
- do_sample=True,
170
- temperature=temperature,
171
- eos_token_id=terminators,
172
- )
173
- if temperature == 0:
174
- generate_kwargs['do_sample'] = False
175
- t = Thread(target=model.generate, kwargs=generate_kwargs)
176
- t.start()
177
- outputs = []
178
- for text in streamer:
179
- outputs.append(text)
180
- yield "".join(outputs)
181
- # Gradio block
182
- chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
183
- with gr.Blocks(fill_height=True, css=css) as demo:
184
- gr.Markdown(DESCRIPTION)
185
- gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
186
- gr.ChatInterface(
187
- fn=chat_viv_updated,
188
- chatbot=chatbot,
189
- fill_height=True,
190
- additional_inputs_accordion=gr.Accordion(label=":gear: Parameters", open=False, render=False),
191
- additional_inputs=[
192
- gr.Slider(minimum=0,
193
- maximum=1,
194
- step=0.1,
195
- value=0.95,
196
- label="Temperature",
197
- render=False),
198
- gr.Slider(minimum=128,
199
- maximum=4096,
200
- step=1,
201
- value=512,
202
- label="Max new tokens",
203
- render=False ),
204
- ],
205
- examples=[
206
- ['Who is Viv?'],
207
- ],
208
- cache_examples=False,
209
- )
210
- if __name__ == "__main__":
211
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ import torch
4
+ # Define the repository where your model is saved
5
+ model_repo = "Dumele/viv-updated" # Replace with your actual repository
6
+ # Load the tokenizer from the repository
7
+ tokenizer = AutoTokenizer.from_pretrained(model_repo)
8
+ # Define the configuration with `disable_exllama` set to True
9
+ quantization_config = GPTQConfig(bits=4, disable_exllama=True)
10
+ # Load the model with the custom configuration
11
+ model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)
12
+ # Move the model to GPU if available
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ model.to(device)
15
+ # Create a text generation pipeline
16
+ text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
17
+ def chat_with_model(prompt):
18
+ # Define the formatted prompt
19
+ formatted_prompt = f"###Human: Answer this question: {prompt}\n###Assistant:"
20
+ # Generate text
21
+ generated_text = text_generator(formatted_prompt, max_length=100, num_return_sequences=1)
22
+ # Return the generated text
23
+ return generated_text[0]['generated_text'].replace(formatted_prompt, '').strip()
24
+ # Create Gradio Interface
25
+ iface = gr.Interface(
26
+ fn=chat_with_model,
27
+ inputs="text",
28
+ outputs="text",
29
+ title="Mistral 7B Chatbot",
30
+ description="A chatbot powered by the Mistral 7B model fine-tuned on a custom dataset."
31
+ )
32
+ # Launch the interface
33
+ iface.launch()