Spaces:
Sleeping
Sleeping
File size: 1,653 Bytes
c330d26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from llama_cpp import Llama
import streamlit as st
model_path = "vicuna-13b-v1.5.ggmlv3.q2_K.bin"
llama = Llama(model_path)
def generate_response(messages: list) -> str:
response = llama.create_chat_completion(messages, max_tokens=-1, stream=False)
print(f"response: {response}")
return response['choices'][0]['message']['content']
def main():
st.title("Chat with Vicuna!")
# Session state for retaining messages
if 'messages' not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(f"{message['content']}")
# Input for the user message
user_message = st.chat_input("Your Message")
# React to user input
if user_message:
# Display user message in chat message container
with st.chat_message("user"):
st.markdown(f"{user_message}")
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": user_message})
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
for char in generate_response([{"role": m["role"], "content": m["content"]} for m in st.session_state.messages]):
full_response += char
message_placeholder.markdown(full_response + "❙")
message_placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
if __name__ == "__main__":
main()
|