Spaces:
Build error
Build error
File size: 4,101 Bytes
7009660 fbb697c 1f84a9a 4f0dc21 fbb697c 7009660 1f84a9a fbb697c 1f84a9a fbb697c 1f84a9a fbb697c 7009660 fbb697c 80fe2b7 fbb697c 4f0dc21 b45426b 4f0dc21 b45426b 4f0dc21 80fe2b7 4f0dc21 80fe2b7 4f0dc21 b45426b 4f0dc21 fbb697c 4f0dc21 af7f187 80fe2b7 4f0dc21 80fe2b7 fbb697c 1f84a9a fbb697c 1f84a9a 80fe2b7 fbb697c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import streamlit as st
import latex2markdown
from langchain.docstore.document import Document
import chromadb
from chromadb.config import Settings
import load_model
from load_model import load_embedding
from load_vectors import load_from_file, load_and_split, create_and_add, load_from_web
persist_directory = load_model.persist_directory
def format_document(document: Document):
"""TODO: Implement a nice style"""
return document.dict()
def format_result_set(result):
st.write(latex2markdown.LaTeX2Markdown(result["result"]).to_markdown())
agree = st.checkbox('Show source documents')
source_documents = result["source_documents"]
if agree:
st.write('Source Documents:')
for document in source_documents:
st.write(format_document(document))
#@st.cache_resource
def get_chroma_client():
return chromadb.Client(Settings(chroma_db_impl="duckdb+parquet",
persist_directory=persist_directory
))
#@st.cache_data
def retrieve_collections():
client = get_chroma_client()
all_collections = client.list_collections()
print(all_collections)
print(all_collections[0].metadata)
collections = tuple( [{'name': collection.name, 'model_name': collection.metadata['model_name']} for collection in all_collections] )
return collections
def load_files():
client = get_chroma_client()
option = st.radio(
"",
options=["Add Documents", "Start new collection"],
)
collections = retrieve_collections()
if option == "Add Documents":
selected_collection = st.selectbox(
'Add to exsisting collection or create a new one',
collections )
if st.button('Delete Collection (⚠️ This is destructive and not reversible)'):
client.delete_collection(name=selected_collection["name"])
#retrieve_collections.clear()
collections = retrieve_collections()
option = st.radio(
"",
options=["Upload Files from Local", "Upload Files from Web"],
)
if option == "Upload Files from Local":
st.write('Source Documents:')
uploaded_files = st.file_uploader("Choose a PDF file", accept_multiple_files=True)
chunk_size = st.text_area('chunk Size:', 1000)
if st.button('Upload'):
docs = load_from_file(uploaded_files)
sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], {"model_name": selected_collection['model_name']})
uploaded_files=None
st.write("Upload succesful")
else:
st.write('Urls of Source Documents (Comma separated):')
urls = chunk_size = st.text_area('Urls:', '')
chunk_size = st.text_area('chunk Size:', 1000)
urls = urls.replace(",", "" ).replace('"', "" ).split(',')
if st.button('Upload'):
docs = load_from_web(urls)
sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
print(selected_collection['model_name'])
create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], {"model_name": selected_collection['model_name']})
uploaded_files=None
st.write("Upload succesful")
else:
collection = st.text_area('Name of your new collection:', '')
model_name = st.text_area('Choose the embedding function:', "hkunlp/instructor-large")
if st.button('Create'):
if len(collection)>3:
ef = load_embedding(model_name)
metadata= {"loaded_docs":[], "Subject":"Terms Example", "model_name": ef.model_name}
client.create_collection(collection, embedding_function=ef, metadata=metadata)
# retrieve_collections.clear()
st.write("Collection " +collection+" succesfully created.") |