Spaces:
Running
Running
init stablelm 2 chat
Browse files
app.py
CHANGED
@@ -23,11 +23,12 @@ def parse_args():
|
|
23 |
@spaces.GPU()
|
24 |
def predict(message, history, system_prompt, temperature, max_tokens):
|
25 |
global model, tokenizer, device
|
26 |
-
|
27 |
for human, assistant in history:
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
31 |
stop_tokens = ["<|endoftext|>", "<|im_end|>"]
|
32 |
streamer = TextIteratorStreamer(tokenizer, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
|
33 |
enc = tokenizer(problem, return_tensors="pt", padding=True, truncation=True)
|
@@ -61,14 +62,14 @@ def predict(message, history, system_prompt, temperature, max_tokens):
|
|
61 |
|
62 |
if __name__ == "__main__":
|
63 |
args = parse_args()
|
64 |
-
tokenizer = AutoTokenizer.from_pretrained("stabilityai/
|
65 |
-
model = AutoModelForCausalLM.from_pretrained("stabilityai/
|
66 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
67 |
model = model.to(device)
|
68 |
gr.ChatInterface(
|
69 |
predict,
|
70 |
-
title="
|
71 |
-
description="
|
72 |
theme="soft",
|
73 |
chatbot=gr.Chatbot(label="Chat History",),
|
74 |
textbox=gr.Textbox(placeholder="input", container=False, scale=7),
|
@@ -76,8 +77,8 @@ if __name__ == "__main__":
|
|
76 |
undo_btn="Delete Previous",
|
77 |
clear_btn="Clear",
|
78 |
additional_inputs=[
|
79 |
-
gr.Textbox("
|
80 |
-
gr.Slider(0, 1, 0.
|
81 |
gr.Slider(100, 2048, 1024, label="Max Tokens"),
|
82 |
],
|
83 |
additional_inputs_accordion_name="Parameters",
|
|
|
23 |
@spaces.GPU()
|
24 |
def predict(message, history, system_prompt, temperature, max_tokens):
|
25 |
global model, tokenizer, device
|
26 |
+
messages = [{'role': 'system', 'content': system_prompt}]
|
27 |
for human, assistant in history:
|
28 |
+
messages.append({'role': 'user', 'content': human})
|
29 |
+
messages.append({'role': 'assistant', 'content': assistant})
|
30 |
+
messages.append({'role': 'user', 'content': message})
|
31 |
+
problem = [tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)]
|
32 |
stop_tokens = ["<|endoftext|>", "<|im_end|>"]
|
33 |
streamer = TextIteratorStreamer(tokenizer, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
|
34 |
enc = tokenizer(problem, return_tensors="pt", padding=True, truncation=True)
|
|
|
62 |
|
63 |
if __name__ == "__main__":
|
64 |
args = parse_args()
|
65 |
+
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-chat", trust_remote_code=True)
|
66 |
+
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-2-chat", trust_remote_code=True, torch_dtype=torch.bfloat16)
|
67 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
68 |
model = model.to(device)
|
69 |
gr.ChatInterface(
|
70 |
predict,
|
71 |
+
title="StableLM 2 Chat - Demo",
|
72 |
+
description="StableLM 2 Chat - StabilityAI",
|
73 |
theme="soft",
|
74 |
chatbot=gr.Chatbot(label="Chat History",),
|
75 |
textbox=gr.Textbox(placeholder="input", container=False, scale=7),
|
|
|
77 |
undo_btn="Delete Previous",
|
78 |
clear_btn="Clear",
|
79 |
additional_inputs=[
|
80 |
+
gr.Textbox("You are a helpful assistant.", label="System Prompt"),
|
81 |
+
gr.Slider(0, 1, 0.5, label="Temperature"),
|
82 |
gr.Slider(100, 2048, 1024, label="Max Tokens"),
|
83 |
],
|
84 |
additional_inputs_accordion_name="Parameters",
|