test_mistral_7b_on_cpu

Sleeping

thobuiq commited on Jan 27

Commit

bb85ff8

•

1 Parent(s): dc76d14

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GG
                                             temperature=0.7,
                                             gpu_layers=0,
                                             stream=True,
-                                            threads=int(os.cpu_count() / 2),
                                             max_new_tokens=10000)
@@ -23,8 +23,10 @@ def predict(message, history):
                         for item in history_transformer_format])
     prompt = f"[INST]{messages}[/INST]"
     for text in llm(prompt=prompt):
-        yield text
 # Setting up the Gradio chat interface.
 gr.ChatInterface(predict,

                                             temperature=0.7,
                                             gpu_layers=0,
                                             stream=True,
+                                            threads=int(os.cpu_count()),
                                             max_new_tokens=10000)
                         for item in history_transformer_format])
     prompt = f"[INST]{messages}[/INST]"
+    message_out = ""
     for text in llm(prompt=prompt):
+        message_out += text
+        yield message_out
 # Setting up the Gradio chat interface.
 gr.ChatInterface(predict,