File size: 1,427 Bytes
cd41c7b
 
38f846a
 
 
 
 
e13715a
 
 
cd41c7b
 
 
6dcc394
cd41c7b
 
38f846a
 
cd41c7b
 
 
 
 
e13715a
38f846a
 
e13715a
38f846a
 
 
 
 
6dcc394
0e17931
38f846a
0e17931
6dcc394
 
 
 
 
cd41c7b
6dcc394
c336e96
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import streamlit as st
import pymupdf
import chromadb
from uuid import uuid4

@st.cache_resource
def initdb():
    chroma_client = chromadb.Client()
    collection = chroma_client.get_or_create_collection(name="rag_collection")
    return collection

st.write("## Local RAG \n Get Insights from your documents")

file = st.file_uploader("Upload your Document Here to Query", type=['pdf'])

if file is not None:
    # Read file as bytes and save it.
    # PyMuPDF open can only load from file path
    bytes_data = file.getvalue()
    with open("data/uploaded_file.pdf", "wb") as fp:
        fp.write(bytes_data)
        doc = pymupdf.open(fp)

    texts = [str(page.get_text().encode("utf-8")) for page in doc]

    # VECTOR STORE
    collection = initdb()

    text_ids = [str(uuid4()) for text in texts]
    collection.add(documents=texts, ids=text_ids)
    st.write("Succesfully uploaded document to database.")

    # QUERY AREA
    query = st.text_input("Enter your query")

    if query:
        query_results = collection.query(
            query_texts=[query, ],
            n_results=5,
            include=["documents", ]
        )

        st.write("Database Query Matches")
        query_results

        # query_text = [" ".join([str(element) for element in inner_list])
        #     for inner_list in query_results["documents"]][0]

        # st.write("Database Query Matches")
        # st.markdown(query_text)