Spaces:
Runtime error
Runtime error
File size: 1,660 Bytes
435f938 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import spaces
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import gradio as gr
from utils.chatbot_local import ChatBot
MODEL_PATH = 'lora_adapter'
model = AutoPeftModelForCausalLM.from_pretrained(
MODEL_PATH,
torch_dtype=torch.float16,
device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
chatbot = ChatBot(model=model, tokenizer=tokenizer)
PLACEHOLDER = """
<center>
<p>Hi! How can I help you today?</p>
</center>
"""
CSS = """
.duplicate-button {
margin: auto !important;
color: white !important;
background: black !important;
border-radius: 100vh !important;
}
h3 {
text-align: center;
}
"""
@spaces.GPU()
def stream_chat(
message: str,
history: list,
):
print(f'message: {message}')
print(f'history: {history}')
conversation = [
{"role": "system", "content": "You are a helpful assistant."}
]
for prompt, answer in history:
conversation.extend([
{"role": "user", "content": prompt},
{"role": "assistant", "content": answer},
])
conversation.append({"role": "user", "content": message})
buffer = ""
for token in chatbot.chat(messages=conversation, stream=True):
buffer += token
yield buffer
gr_chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
with gr.Blocks(css=CSS, theme="soft") as demo:
gr.ChatInterface(
fn=stream_chat,
chatbot=gr_chatbot,
fill_height=True,
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
share=False,
debug=True,
)
|