themanas021's picture
Update app.py
bcc2ddb verified
import streamlit as st
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader
import chromadb
chromadb.api.client.SharedSystemClient.clear_system_cache()
import os
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')
# Initialize the embeddings and model
embd = OpenAIEmbeddings()
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
# Initialize conversation history
if "conversation_history" not in st.session_state:
st.session_state.conversation_history = []
# Define the Streamlit app
st.title("Text File Question-Answering with History")
st.subheader("Upload a text file and ask questions. The app will maintain a conversation history.")
# File upload section
uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
from langchain.docstore.document import Document
if uploaded_file:
# Read and decode the content of the uploaded file
file_content = uploaded_file.read().decode("utf-8")
# Convert the content into a LangChain document
document = [Document(page_content=file_content)]
# Split the loaded document
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
doc_splits = text_splitter.split_documents(document)
# Create a vector store
vectorstore = Chroma.from_documents(
documents=doc_splits,
collection_name="conversation_history",
embedding=embd,
persist_directory=None
)
retriever = vectorstore.as_retriever()
# Initialize the QA chain
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
)
# Question-answering section
query = st.text_input("Ask a question:")
if query:
# Process the query
result = qa_chain({"query": query})
answer = result["result"]
sources = result["source_documents"]
# Append to conversation history
st.session_state.conversation_history.append((query, answer, sources))
# Display the current answer
st.write("**Answer:**", answer)
# Display the sources
st.subheader("Source Documents")
for i, doc in enumerate(sources, start=1):
st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}")
st.write(doc.page_content[:500]) # Display the first 500 characters of the source content
# Display conversation history
st.subheader("Conversation History")
for idx, (q, a, s) in enumerate(st.session_state.conversation_history, 1):
st.write(f"**Q{idx}:** {q}")
st.write(f"**A{idx}:** {a}")
st.write(f"**Sources for Q{idx}:**")
for i, doc in enumerate(s, start=1):
st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}")
st.write(doc.page_content[:300]) # Show a snippet for brevity