test_mistral_7b_on_cpu

Sleeping

Update app.py

bb85ff8 verified 10 months ago

1.48 kB

	import gradio as gr
	from threading import Thread
	import os
	from ctransformers import AutoModelForCausalLM


	llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
	model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
	model_type="mistral",
	temperature=0.7,
	gpu_layers=0,
	stream=True,
	threads=int(os.cpu_count()),
	max_new_tokens=10000)


	# Function to generate model predictions.
	def predict(message, history):
	history_transformer_format = history + [[message, ""]]

	# Formatting the input for the model.
	messages = "</s>".join(["</s>".join(["\n<\|user\|>:" + item[0], "\n<\|assistant\|>:" + item[1]])
	for item in history_transformer_format])

	prompt = f"[INST]{messages}[/INST]"
	message_out = ""
	for text in llm(prompt=prompt):
	message_out += text
	yield message_out

	# Setting up the Gradio chat interface.
	gr.ChatInterface(predict,
	title="Test Mistral 7B",
	description="Ask Mistral any questions",
	examples=['How to cook a fish?', 'Who is the president of US now?']
	).launch() # Launching the web interface.