Spaces:
Sleeping
Sleeping
File size: 1,431 Bytes
62c56f5 d15855c 62c56f5 d15855c 62c56f5 5059db6 62c56f5 d15855c 62c56f5 5059db6 62c56f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import gradio as gr
from threading import Thread
import os
from ctransformers import AutoModelForCausalLM
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
model_type="mistral",
temperature=0.7,
gpu_layers=0,
stream=True,
threads=int(os.cpu_count() / 2),
max_new_tokens=10000)
# Function to generate model predictions.
def predict(message, history):
history_transformer_format = history + [[message, ""]]
# Formatting the input for the model.
messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
for item in history_transformer_format])
prompt = f"[INST]{messages.content}[/INST]"
for text in llm(prompt=prompt):
yield text
# Setting up the Gradio chat interface.
gr.ChatInterface(predict,
title="Test Mistral 7B",
description="Ask Mistral any questions",
examples=['How to cook a fish?', 'Who is the president of US now?']
).launch() # Launching the web interface. |