Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chat_models import ChatOpenAI | |
from langchain.chains import RetrievalQA | |
from langchain_community.document_loaders import TextLoader | |
import chromadb | |
chromadb.api.client.SharedSystemClient.clear_system_cache() | |
import os | |
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY') | |
# Initialize the embeddings and model | |
embd = OpenAIEmbeddings() | |
llm = ChatOpenAI(model_name="gpt-4o", temperature=0) | |
# Initialize conversation history | |
if "conversation_history" not in st.session_state: | |
st.session_state.conversation_history = [] | |
# Define the Streamlit app | |
st.title("Text File Question-Answering with History") | |
st.subheader("Upload a text file and ask questions. The app will maintain a conversation history.") | |
# File upload section | |
uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) | |
from langchain.docstore.document import Document | |
if uploaded_file: | |
# Read and decode the content of the uploaded file | |
file_content = uploaded_file.read().decode("utf-8") | |
# Convert the content into a LangChain document | |
document = [Document(page_content=file_content)] | |
# Split the loaded document | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) | |
doc_splits = text_splitter.split_documents(document) | |
# Create a vector store | |
vectorstore = Chroma.from_documents( | |
documents=doc_splits, | |
collection_name="conversation_history", | |
embedding=embd, | |
persist_directory=None | |
) | |
retriever = vectorstore.as_retriever() | |
# Initialize the QA chain | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=retriever, | |
return_source_documents=True, | |
) | |
# Question-answering section | |
query = st.text_input("Ask a question:") | |
if query: | |
# Process the query | |
result = qa_chain({"query": query}) | |
answer = result["result"] | |
sources = result["source_documents"] | |
# Append to conversation history | |
st.session_state.conversation_history.append((query, answer, sources)) | |
# Display the current answer | |
st.write("**Answer:**", answer) | |
# Display the sources | |
st.subheader("Source Documents") | |
for i, doc in enumerate(sources, start=1): | |
st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}") | |
st.write(doc.page_content[:500]) # Display the first 500 characters of the source content | |
# Display conversation history | |
st.subheader("Conversation History") | |
for idx, (q, a, s) in enumerate(st.session_state.conversation_history, 1): | |
st.write(f"**Q{idx}:** {q}") | |
st.write(f"**A{idx}:** {a}") | |
st.write(f"**Sources for Q{idx}:**") | |
for i, doc in enumerate(s, start=1): | |
st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}") | |
st.write(doc.page_content[:300]) # Show a snippet for brevity | |