Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pymupdf | |
import chromadb | |
from uuid import uuid4 | |
def initdb(): | |
chroma_client = chromadb.Client() | |
collection = chroma_client.get_or_create_collection(name="rag_collection") | |
return collection | |
st.write("## Local RAG \n Get Insights from your documents") | |
file = st.file_uploader("Upload your Document Here to Query", type=['pdf']) | |
if file is not None: | |
# Read file as bytes and save it. | |
# PyMuPDF open can only load from file path | |
bytes_data = file.getvalue() | |
with open("data/uploaded_file.pdf", "wb") as fp: | |
fp.write(bytes_data) | |
doc = pymupdf.open(fp) | |
texts = [str(page.get_text().encode("utf-8")) for page in doc] | |
# VECTOR STORE | |
collection = initdb() | |
text_ids = [str(uuid4()) for text in texts] | |
collection.add(documents=texts, ids=text_ids) | |
st.write("Succesfully uploaded document to database.") | |
# QUERY AREA | |
query = st.text_input("Enter your query") | |
if query: | |
query_results = collection.query( | |
query_texts=[query, ], | |
n_results=5, | |
include=["documents", ] | |
) | |
st.write("Database Query Matches") | |
query_results | |
# query_text = [" ".join([str(element) for element in inner_list]) | |
# for inner_list in query_results["documents"]][0] | |
# st.write("Database Query Matches") | |
# st.markdown(query_text) | |