Spaces:
Runtime error
Runtime error
File size: 1,427 Bytes
cd41c7b 38f846a e13715a cd41c7b 6dcc394 cd41c7b 38f846a cd41c7b e13715a 38f846a e13715a 38f846a 6dcc394 0e17931 38f846a 0e17931 6dcc394 cd41c7b 6dcc394 c336e96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import streamlit as st
import pymupdf
import chromadb
from uuid import uuid4
@st.cache_resource
def initdb():
chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection(name="rag_collection")
return collection
st.write("## Local RAG \n Get Insights from your documents")
file = st.file_uploader("Upload your Document Here to Query", type=['pdf'])
if file is not None:
# Read file as bytes and save it.
# PyMuPDF open can only load from file path
bytes_data = file.getvalue()
with open("data/uploaded_file.pdf", "wb") as fp:
fp.write(bytes_data)
doc = pymupdf.open(fp)
texts = [str(page.get_text().encode("utf-8")) for page in doc]
# VECTOR STORE
collection = initdb()
text_ids = [str(uuid4()) for text in texts]
collection.add(documents=texts, ids=text_ids)
st.write("Succesfully uploaded document to database.")
# QUERY AREA
query = st.text_input("Enter your query")
if query:
query_results = collection.query(
query_texts=[query, ],
n_results=5,
include=["documents", ]
)
st.write("Database Query Matches")
query_results
# query_text = [" ".join([str(element) for element in inner_list])
# for inner_list in query_results["documents"]][0]
# st.write("Database Query Matches")
# st.markdown(query_text)
|