import spaces from huggingface_hub import InferenceClient import gradio as gr client = InferenceClient('mistralai/Mixtral-8x7B-Instruct-v0.1') @spaces.GPU(duration=60) def generate_response(chat, kwargs): output = '' stream = client.text_generation(chat, **kwargs, stream=True, details=True, return_full_text=False) for response in stream: output += response.token.text if output.endswith(""): # Sprawdzamy, czy odpowiedź kończy się tagiem output = output[:-4] # Usuwamy tag z końca odpowiedzi return output def function(prompt, history=[]): chat = "" for user_prompt, bot_response in history: chat += f"[INST] {user_prompt} [/INST] {bot_response} " chat += f"[INST] {prompt} [/INST]" # Zostawiamy tylko tag otwierający na początku i kończymy ciąg zwykłym znacznikiem kwargs = dict( temperature=0.5, max_new_tokens=4096, top_p=0.95, repetition_penalty=1.0, do_sample=True, seed=1337 ) try: output = generate_response(chat, kwargs) return output except: return '' interface = gr.ChatInterface( fn=function, chatbot=gr.Chatbot( avatar_images=None, container=False, show_copy_button=True, layout='bubble', render_markdown=True, line_breaks=True ), css='h1 {font-size:22px;} h2 {font-size:20px;} h3 {font-size:18px;} h4 {font-size:16px;}', autofocus=True, fill_height=True, analytics_enabled=False, submit_btn='Chat', stop_btn=None, retry_btn=None, undo_btn=None, clear_btn=None ) interface.launch(show_api=True, share=True)