Spaces:
Sleeping
Sleeping
import spaces | |
from huggingface_hub import InferenceClient | |
import gradio as gr | |
client = InferenceClient('mistralai/Mixtral-8x7B-Instruct-v0.1') | |
def generate_response(chat, kwargs): | |
output = '' | |
stream = client.text_generation(chat, **kwargs, stream=True, details=True, return_full_text=False) | |
for response in stream: | |
output += response.token.text | |
if output.endswith("</s>"): # Sprawdzamy, czy odpowiedź kończy się tagiem </s> | |
output = output[:-4] # Usuwamy tag </s> z końca odpowiedzi | |
return output | |
def function(prompt, history=[]): | |
chat = "<s>" | |
for user_prompt, bot_response in history: | |
chat += f"[INST] {user_prompt} [/INST] {bot_response}</s> <s>" | |
chat += f"[INST] {prompt} [/INST]" # Zostawiamy tylko tag otwierający <s> na początku i kończymy ciąg zwykłym znacznikiem | |
kwargs = dict( | |
temperature=0.5, | |
max_new_tokens=4096, | |
top_p=0.95, | |
repetition_penalty=1.0, | |
do_sample=True, | |
seed=1337 | |
) | |
try: | |
output = generate_response(chat, kwargs) | |
return output | |
except: | |
return '' | |
interface = gr.ChatInterface( | |
fn=function, | |
chatbot=gr.Chatbot( | |
avatar_images=None, | |
container=False, | |
show_copy_button=True, | |
layout='bubble', | |
render_markdown=True, | |
line_breaks=True | |
), | |
css='h1 {font-size:22px;} h2 {font-size:20px;} h3 {font-size:18px;} h4 {font-size:16px;}', | |
autofocus=True, | |
fill_height=True, | |
analytics_enabled=False, | |
submit_btn='Chat', | |
stop_btn=None, | |
retry_btn=None, | |
undo_btn=None, | |
clear_btn=None | |
) | |
interface.launch(show_api=True, share=True) | |