Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -86,7 +86,8 @@ def evaluate(
|
|
86 |
|
87 |
gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
|
88 |
print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
|
89 |
-
|
|
|
90 |
gc.collect()
|
91 |
torch.cuda.empty_cache()
|
92 |
yield out_str.strip()
|
@@ -243,7 +244,7 @@ def chat(
|
|
243 |
with gr.Blocks(title=title) as demo:
|
244 |
gr.HTML(f"<div style=\"text-align: center;\">\n<h1>🐦Raven - {title}</h1>\n</div>")
|
245 |
with gr.Tab("Instruct mode"):
|
246 |
-
gr.Markdown(f"Raven is [RWKV 14B](https://github.com/BlinkDL/ChatRWKV) 100% RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) finetuned to follow instructions. *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen {ctx_limit}. Finetuned on alpaca, gpt4all, codealpaca and more. For best results, *** keep you prompt short and clear ***. <b>UPDATE: now with Chat (see above, as a tab)
|
247 |
with gr.Row():
|
248 |
with gr.Column():
|
249 |
instruction = gr.Textbox(lines=2, label="Instruction", value="Tell me about ravens.")
|
@@ -263,42 +264,42 @@ with gr.Blocks(title=title) as demo:
|
|
263 |
clear.click(lambda: None, [], [output])
|
264 |
data.click(lambda x: x, [data], [instruction, input, token_count, temperature, top_p, presence_penalty, count_penalty])
|
265 |
|
266 |
-
with gr.Tab("Chat (Experimental - Might be buggy - use ChatRWKV for reference)"):
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
|
303 |
demo.queue(concurrency_count=1, max_size=10)
|
304 |
demo.launch(share=False)
|
|
|
86 |
|
87 |
gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
|
88 |
print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
|
89 |
+
del out
|
90 |
+
del state
|
91 |
gc.collect()
|
92 |
torch.cuda.empty_cache()
|
93 |
yield out_str.strip()
|
|
|
244 |
with gr.Blocks(title=title) as demo:
|
245 |
gr.HTML(f"<div style=\"text-align: center;\">\n<h1>🐦Raven - {title}</h1>\n</div>")
|
246 |
with gr.Tab("Instruct mode"):
|
247 |
+
gr.Markdown(f"Raven is [RWKV 14B](https://github.com/BlinkDL/ChatRWKV) 100% RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) finetuned to follow instructions. *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen {ctx_limit}. Finetuned on alpaca, gpt4all, codealpaca and more. For best results, *** keep you prompt short and clear ***. <b>UPDATE: now with Chat (see above, as a tab) ==> turn off as of now due to VRAM leak caused by buggy code.</b>.")
|
248 |
with gr.Row():
|
249 |
with gr.Column():
|
250 |
instruction = gr.Textbox(lines=2, label="Instruction", value="Tell me about ravens.")
|
|
|
264 |
clear.click(lambda: None, [], [output])
|
265 |
data.click(lambda x: x, [data], [instruction, input, token_count, temperature, top_p, presence_penalty, count_penalty])
|
266 |
|
267 |
+
# with gr.Tab("Chat (Experimental - Might be buggy - use ChatRWKV for reference)"):
|
268 |
+
# gr.Markdown(f'''<b>*** The length of response is restricted in this demo. Use ChatRWKV for longer generations. ***</b> Say "go on" or "continue" can sometimes continue the response. If you'd like to edit the scenario, make sure to follow the exact same format: empty lines between (and only between) different speakers. Changes only take effect after you press [Clear]. <b>The default "Bob" & "Alice" names work the best.</b>''', label="Description")
|
269 |
+
# with gr.Row():
|
270 |
+
# with gr.Column():
|
271 |
+
# chatbot = gr.Chatbot()
|
272 |
+
# state = gr.State()
|
273 |
+
# message = gr.Textbox(label="Message", value="Write me a python code to land on moon.")
|
274 |
+
# with gr.Row():
|
275 |
+
# send = gr.Button("Send", variant="primary")
|
276 |
+
# alt = gr.Button("Alternative", variant="secondary")
|
277 |
+
# clear = gr.Button("Clear", variant="secondary")
|
278 |
+
# with gr.Column():
|
279 |
+
# with gr.Row():
|
280 |
+
# user_name = gr.Textbox(lines=1, max_lines=1, label="User Name", value="Bob")
|
281 |
+
# bot_name = gr.Textbox(lines=1, max_lines=1, label="Bot Name", value="Alice")
|
282 |
+
# prompt = gr.Textbox(lines=10, max_lines=50, label="Scenario", value=chat_intro)
|
283 |
+
# temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.2)
|
284 |
+
# top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.5)
|
285 |
+
# presence_penalty = gr.Slider(0.0, 1.0, label="Presence Penalty", step=0.1, value=0.4)
|
286 |
+
# count_penalty = gr.Slider(0.0, 1.0, label="Count Penalty", step=0.1, value=0.4)
|
287 |
+
# chat_inputs = [
|
288 |
+
# prompt,
|
289 |
+
# user_name,
|
290 |
+
# bot_name,
|
291 |
+
# chatbot,
|
292 |
+
# state,
|
293 |
+
# temperature,
|
294 |
+
# top_p,
|
295 |
+
# presence_penalty,
|
296 |
+
# count_penalty
|
297 |
+
# ]
|
298 |
+
# chat_outputs = [chatbot, state]
|
299 |
+
# message.submit(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
|
300 |
+
# send.click(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
|
301 |
+
# alt.click(alternative, [chatbot, state], [chatbot, state], queue=False).then(chat, chat_inputs, chat_outputs)
|
302 |
+
# clear.click(lambda: ([], None, ""), [], [chatbot, state, message], queue=False)
|
303 |
|
304 |
demo.queue(concurrency_count=1, max_size=10)
|
305 |
demo.launch(share=False)
|