Spaces:
Running
Running
File size: 1,818 Bytes
220b833 ca3417a c7669eb 898c24c c7669eb 220b833 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import json
import os
import requests
import streamlit as st
CHATBOT_ENDPOINT = os.environ["CHATBOT_ENDPOINT"]
TOKEN = os.environ["TOKEN"]
def generate(prompt):
try:
r = requests.post(
CHATBOT_ENDPOINT,
data=json.dumps({"instruction": prompt, "token": TOKEN}),
headers = {"content-type": "application/json"},
timeout = 20,
stream = True,
)
new_str = b""
for s in r.iter_content():
new_str += s
try:
output_str = new_str.decode("utf-8")
yield output_str.replace("\n", " \n")
new_str = b""
except:
pass
except:
yield "<<Some errors occured>>"
st.title("Stockmark-LLM-100b")
intro = """This is a demo site for Stockmark-LLM-100b. This service is running on AWS Inferentia2.
- Pretrained model: [stockmark/stockmark-100b](https://huggingface.co/stockmark/stockmark-100b)
- Instruction tuned model: [stockmark/stockmark-100b-instruct-v0.1](https://huggingface.co/stockmark/stockmark-100b-instruct-v0.1)
"""
st.markdown(intro)
prompt = st.session_state.get("prompt", "")
response = st.session_state.get("response", "")
if prompt == "" or response:
print("new_session")
prompt_new = st.text_area("Prompt:")
if prompt_new:
st.session_state["prompt"] = prompt_new
st.session_state["response"] = ""
st.rerun()
else:
prompt = st.text_area("Prompt:", value=prompt, disabled=True)
if prompt:
if response:
with st.chat_message("assistant"):
st.write(response)
else:
with st.chat_message("assistant"):
response = st.write_stream(generate(prompt))
st.session_state["response"] = response
st.rerun()
|