Spaces:

LVKinyanjui
/

QueryYourDocs

Sleeping

QueryYourDocs / app.py

Updated requirements and hf model

c336e96 30 days ago

No virus

1.49 kB

	import streamlit as st
	import pymupdf
	import chromadb
	from uuid import uuid4

	@st.cache_resource
	def initdb():
	chroma_client = chromadb.Client()
	collection = chroma_client.get_or_create_collection(name="rag_collection")
	return collection

	st.write("## Local RAG \n Get Insights from your documents")

	file = st.file_uploader("Upload your Document Here to Query", type=['pdf'])

	if file is not None:
	# Read file as bytes and save it.
	# PyMuPDF open can only load from file path
	bytes_data = file.getvalue()
	with open("data/uploaded_file.pdf", "wb") as fp:
	fp.write(bytes_data)
	doc = pymupdf.open(fp)

	texts = [str(page.get_text().encode("utf-8")) for page in doc]

	# VECTOR STORE
	collection = initdb()

	text_ids = [str(uuid4()) for text in texts]
	collection.add(documents=texts, ids=text_ids)
	st.write("Succesfully uploaded document to database.")

	# QUERY AREA
	query = st.text_input(
	"Enter your query",
	# disabled=st.session_state.disabled,
	)

	if query != "":
	query_results = collection.query(
	query_texts=[query, ],
	n_results=5,
	include=["documents", ]
	)

	st.write("Database Query Matches")
	query_results

	# query_text = [" ".join([str(element) for element in inner_list])
	# for inner_list in query_results["documents"]][0]

	# st.write("Database Query Matches")
	# st.markdown(query_text)