import streamlit as st import pymupdf import chromadb from uuid import uuid4 @st.cache_resource def initdb(): chroma_client = chromadb.Client() collection = chroma_client.get_or_create_collection(name="rag_collection") return collection st.write("## Local RAG \n Get Insights from your documents") file = st.file_uploader("Upload your Document Here", type=['pdf']) if file is not None: # Read file as bytes and save it. # PyMuPDF open can only load from file path bytes_data = file.getvalue() with open("data/uploaded_file.pdf", "wb") as fp: fp.write(bytes_data) doc = pymupdf.open(fp) texts = [str(page.get_text().encode("utf-8")) for page in doc] # VECTOR STORE collection = initdb() text_ids = [str(uuid4()) for text in texts] collection.add(documents=texts, ids=text_ids) st.write("Succesfully uploaded document to database.")