Spaces:

zhangshengdong
/

llama-3-chinese-8b-tool

Runtime error

Create app.py

435f938 verified 3 months ago

1.66 kB

	import spaces
	import torch
	from peft import AutoPeftModelForCausalLM
	from transformers import AutoTokenizer
	import gradio as gr

	from utils.chatbot_local import ChatBot

	MODEL_PATH = 'lora_adapter'

	model = AutoPeftModelForCausalLM.from_pretrained(
	MODEL_PATH,
	torch_dtype=torch.float16,
	device_map="auto",
	)
	tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

	chatbot = ChatBot(model=model, tokenizer=tokenizer)

	PLACEHOLDER = """
	<center>
	<p>Hi! How can I help you today?</p>
	</center>
	"""

	CSS = """
	.duplicate-button {
	margin: auto !important;
	color: white !important;
	background: black !important;
	border-radius: 100vh !important;
	}
	h3 {
	text-align: center;
	}
	"""


	@spaces.GPU()
	def stream_chat(
	message: str,
	history: list,
	):
	print(f'message: {message}')
	print(f'history: {history}')

	conversation = [
	{"role": "system", "content": "You are a helpful assistant."}
	]
	for prompt, answer in history:
	conversation.extend([
	{"role": "user", "content": prompt},
	{"role": "assistant", "content": answer},
	])

	conversation.append({"role": "user", "content": message})

	buffer = ""
	for token in chatbot.chat(messages=conversation, stream=True):
	buffer += token
	yield buffer


	gr_chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)

	with gr.Blocks(css=CSS, theme="soft") as demo:
	gr.ChatInterface(
	fn=stream_chat,
	chatbot=gr_chatbot,
	fill_height=True,
	)

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	share=False,
	debug=True,
	)