Spaces:
Runtime error
Runtime error
import spaces | |
import torch | |
from peft import AutoPeftModelForCausalLM | |
from transformers import AutoTokenizer | |
import gradio as gr | |
from utils.chatbot_local import ChatBot | |
MODEL_PATH = 'lora_adapter' | |
model = AutoPeftModelForCausalLM.from_pretrained( | |
MODEL_PATH, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) | |
chatbot = ChatBot(model=model, tokenizer=tokenizer) | |
PLACEHOLDER = """ | |
<center> | |
<p>Hi! How can I help you today?</p> | |
</center> | |
""" | |
CSS = """ | |
.duplicate-button { | |
margin: auto !important; | |
color: white !important; | |
background: black !important; | |
border-radius: 100vh !important; | |
} | |
h3 { | |
text-align: center; | |
} | |
""" | |
def stream_chat( | |
message: str, | |
history: list, | |
): | |
print(f'message: {message}') | |
print(f'history: {history}') | |
conversation = [ | |
{"role": "system", "content": "You are a helpful assistant."} | |
] | |
for prompt, answer in history: | |
conversation.extend([ | |
{"role": "user", "content": prompt}, | |
{"role": "assistant", "content": answer}, | |
]) | |
conversation.append({"role": "user", "content": message}) | |
buffer = "" | |
for token in chatbot.chat(messages=conversation, stream=True): | |
buffer += token | |
yield buffer | |
gr_chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER) | |
with gr.Blocks(css=CSS, theme="soft") as demo: | |
gr.ChatInterface( | |
fn=stream_chat, | |
chatbot=gr_chatbot, | |
fill_height=True, | |
) | |
if __name__ == "__main__": | |
demo.launch( | |
server_name="0.0.0.0", | |
share=False, | |
debug=True, | |
) | |