import streamlit as st import pymupdf import chromadb from uuid import uuid4 @st.cache_resource def initdb(): chroma_client = chromadb.Client() collection = chroma_client.get_or_create_collection(name="rag_collection") return collection st.write("## Local RAG \n Get Insights from your documents") file = st.file_uploader("Upload your Document Here to Query", type=['pdf']) if file is not None: # Read file as bytes and save it. # PyMuPDF open can only load from file path bytes_data = file.getvalue() with open("data/uploaded_file.pdf", "wb") as fp: fp.write(bytes_data) doc = pymupdf.open(fp) texts = [str(page.get_text().encode("utf-8")) for page in doc] # VECTOR STORE collection = initdb() text_ids = [str(uuid4()) for text in texts] collection.add(documents=texts, ids=text_ids) st.write("Succesfully uploaded document to database.") # QUERY AREA query = st.text_input("Enter your query") if query: query_results = collection.query( query_texts=[query, ], n_results=5, include=["documents", ] ) st.write("Database Query Matches") query_results # query_text = [" ".join([str(element) for element in inner_list]) # for inner_list in query_results["documents"]][0] # st.write("Database Query Matches") # st.markdown(query_text)