Spaces:

LVKinyanjui
/

QueryYourDocs

Sleeping

File size: 1,494 Bytes

cd41c7b
 
38f846a
 
 
 
 
e13715a
 
 
cd41c7b
 
 
6dcc394
cd41c7b
 
38f846a
 
cd41c7b
 
 
 
 
e13715a
38f846a
 
e13715a
38f846a
 
 
 
 
6dcc394
 
 
 
 
38f846a
6dcc394
 
 
 
 
 
cd41c7b
6dcc394
c336e96

import streamlit as st
import pymupdf
import chromadb
from uuid import uuid4

@st.cache_resource
def initdb():
    chroma_client = chromadb.Client()
    collection = chroma_client.get_or_create_collection(name="rag_collection")
    return collection

st.write("## Local RAG \n Get Insights from your documents")

file = st.file_uploader("Upload your Document Here to Query", type=['pdf'])

if file is not None:
    # Read file as bytes and save it.
    # PyMuPDF open can only load from file path
    bytes_data = file.getvalue()
    with open("data/uploaded_file.pdf", "wb") as fp:
        fp.write(bytes_data)
        doc = pymupdf.open(fp)

    texts = [str(page.get_text().encode("utf-8")) for page in doc]

    # VECTOR STORE
    collection = initdb()

    text_ids = [str(uuid4()) for text in texts]
    collection.add(documents=texts, ids=text_ids)
    st.write("Succesfully uploaded document to database.")

    # QUERY AREA
    query = st.text_input(
        "Enter your query",
        # disabled=st.session_state.disabled,
    )

    if query != "":
        query_results = collection.query(
            query_texts=[query, ],
            n_results=5,
            include=["documents", ]
        )

        st.write("Database Query Matches")
        query_results

        # query_text = [" ".join([str(element) for element in inner_list])
        #     for inner_list in query_results["documents"]][0]

        # st.write("Database Query Matches")
        # st.markdown(query_text)