import spaces
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import gradio as gr
from utils.chatbot_local import ChatBot
MODEL_PATH = 'lora_adapter'
model = AutoPeftModelForCausalLM.from_pretrained(
MODEL_PATH,
torch_dtype=torch.float16,
device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
chatbot = ChatBot(model=model, tokenizer=tokenizer)
PLACEHOLDER = """
Hi! How can I help you today?
"""
CSS = """
.duplicate-button {
margin: auto !important;
color: white !important;
background: black !important;
border-radius: 100vh !important;
}
h3 {
text-align: center;
}
"""
@spaces.GPU()
def stream_chat(
message: str,
history: list,
):
print(f'message: {message}')
print(f'history: {history}')
conversation = [
{"role": "system", "content": "You are a helpful assistant."}
]
for prompt, answer in history:
conversation.extend([
{"role": "user", "content": prompt},
{"role": "assistant", "content": answer},
])
conversation.append({"role": "user", "content": message})
buffer = ""
for token in chatbot.chat(messages=conversation, stream=True):
buffer += token
yield buffer
gr_chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
with gr.Blocks(css=CSS, theme="soft") as demo:
gr.ChatInterface(
fn=stream_chat,
chatbot=gr_chatbot,
fill_height=True,
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
share=False,
debug=True,
)