Spaces:
Sleeping
Sleeping
import gradio as gr | |
from threading import Thread | |
import os | |
from ctransformers import AutoModelForCausalLM | |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", | |
model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf", | |
model_type="mistral", | |
temperature=0.7, | |
gpu_layers=0, | |
stream=True, | |
threads=int(os.cpu_count()), | |
max_new_tokens=10000) | |
# Function to generate model predictions. | |
def predict(message, history): | |
history_transformer_format = history + [[message, ""]] | |
# Formatting the input for the model. | |
messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]]) | |
for item in history_transformer_format]) | |
prompt = f"[INST]{messages}[/INST]" | |
message_out = "" | |
for text in llm(prompt=prompt): | |
message_out += text | |
yield message_out | |
# Setting up the Gradio chat interface. | |
gr.ChatInterface(predict, | |
title="Test Mistral 7B", | |
description="Ask Mistral any questions", | |
examples=['How to cook a fish?', 'Who is the president of US now?'] | |
).launch() # Launching the web interface. |