Spaces:

stockmark
/

stockmark-LLM

Running

File size: 1,818 Bytes

import json 
import os

import requests
import streamlit as st

CHATBOT_ENDPOINT = os.environ["CHATBOT_ENDPOINT"]
TOKEN = os.environ["TOKEN"]

def generate(prompt):

    try:
        r = requests.post(
            CHATBOT_ENDPOINT,
            data=json.dumps({"instruction": prompt, "token": TOKEN}),
            headers = {"content-type": "application/json"},
            timeout = 20,
            stream = True,
        )
        new_str = b""
        for s in r.iter_content():
            new_str += s
            try:
                output_str = new_str.decode("utf-8")
                yield output_str.replace("\n", "  \n")
                new_str = b""
            except:
                pass
    except:
        yield "<<Some errors occured>>"


st.title("Stockmark-LLM-100b")

intro = """This is a demo site for Stockmark-LLM-100b. This service is running on AWS Inferentia2.
- Pretrained model: [stockmark/stockmark-100b](https://huggingface.co/stockmark/stockmark-100b)
- Instruction tuned model: [stockmark/stockmark-100b-instruct-v0.1](https://huggingface.co/stockmark/stockmark-100b-instruct-v0.1)
"""

st.markdown(intro)

prompt = st.session_state.get("prompt", "")
response = st.session_state.get("response", "")

if prompt == "" or response:
    print("new_session")
    prompt_new = st.text_area("Prompt:")
    if prompt_new:
        st.session_state["prompt"] = prompt_new
        st.session_state["response"] = ""
        st.rerun()
else:
    prompt = st.text_area("Prompt:", value=prompt, disabled=True)

if prompt:

    if response:
        with st.chat_message("assistant"):
            st.write(response)
    else:
        with st.chat_message("assistant"):
            response = st.write_stream(generate(prompt))
        
        st.session_state["response"] = response
        st.rerun()