Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -54,19 +54,17 @@ def predict(message, history, system_prompt, temperature, max_tokens):
|
|
54 |
outputs = []
|
55 |
for text in streamer:
|
56 |
outputs.append(text)
|
57 |
-
|
58 |
-
for tok in stop_tokens:
|
59 |
-
if tok in text:
|
60 |
-
stop = True
|
61 |
-
break
|
62 |
-
if stop:
|
63 |
-
yield "".join(outputs)
|
64 |
break
|
|
|
|
|
|
|
|
|
65 |
|
66 |
if __name__ == "__main__":
|
67 |
args = parse_args()
|
68 |
-
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-
|
69 |
-
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-2-
|
70 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
71 |
model = model.to(device)
|
72 |
gr.ChatInterface(
|
@@ -85,4 +83,4 @@ if __name__ == "__main__":
|
|
85 |
gr.Slider(100, 2048, 1024, label="Max Tokens"),
|
86 |
],
|
87 |
additional_inputs_accordion_name="Parameters",
|
88 |
-
).queue().launch()
|
|
|
54 |
outputs = []
|
55 |
for text in streamer:
|
56 |
outputs.append(text)
|
57 |
+
if text in stop_tokens:
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
break
|
59 |
+
print(text)
|
60 |
+
yield "".join(outputs)
|
61 |
+
|
62 |
+
|
63 |
|
64 |
if __name__ == "__main__":
|
65 |
args = parse_args()
|
66 |
+
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-chat", trust_remote_code=True)
|
67 |
+
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-2-chat", trust_remote_code=True, torch_dtype=torch.bfloat16)
|
68 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
69 |
model = model.to(device)
|
70 |
gr.ChatInterface(
|
|
|
83 |
gr.Slider(100, 2048, 1024, label="Max Tokens"),
|
84 |
],
|
85 |
additional_inputs_accordion_name="Parameters",
|
86 |
+
).queue().launch()
|