import streamlit as st import latex2markdown from langchain.docstore.document import Document import chromadb from chromadb.config import Settings import load_model from load_vectors import load_from_file, load_and_split, create_and_add, load_from_web persist_directory = load_model.persist_directory def format_document(document: Document): """TODO: Implement a nice style""" return document.dict() def format_result_set(result): st.write(latex2markdown.LaTeX2Markdown(result["result"]).to_markdown()) agree = st.checkbox('Show source documents') source_documents = result["source_documents"] if agree: st.write('Source Documents:') for document in source_documents: st.write(format_document(document)) @st.cache_resource def get_chroma_client(): return chromadb.Client(Settings(chroma_db_impl="duckdb+parquet", persist_directory=persist_directory )) @st.cache_data def retrieve_collections(): client = get_chroma_client() collections = tuple( [collection.name for collection in client.list_collections()] ) return collections def load_files(): client = get_chroma_client() option = st.radio( "", options=["Add Documents", "Start new collection"], ) collections = retrieve_collections() if option == "Add Documents": selected_collection = st.selectbox( 'Add to exsisting collection or create a new one', collections ) if st.button('Delete Collection (⚠️ This is destructive and not reversible)'): client.delete_collection(name=selected_collection) retrieve_collections.clear() collections = retrieve_collections() option = st.radio( "", options=["Upload Files", "Download Files"], ) if option == "Upload Files": st.write('Source Documents:') uploaded_files = st.file_uploader("Choose a PDF file", accept_multiple_files=True) chunk_size = st.text_area('chunk Size:', 1000) if st.button('Upload'): docs = load_from_file(uploaded_files) sub_docs = load_and_split(docs, chunk_size=int(chunk_size)) create_and_add(selected_collection, sub_docs, "hkunlp/instructor-large") uploaded_files=None else: st.write('Source Documents (Comma separated):') urls = chunk_size = st.text_area('Urls:', '') chunk_size = st.text_area('chunk Size:', 1000) urls = urls.replace(",", "" ).replace('"', "" ).split(',') if st.button('Upload'): docs = load_from_web(urls) sub_docs = load_and_split(docs, chunk_size=int(chunk_size)) create_and_add(selected_collection, sub_docs, "hkunlp/instructor-large") uploaded_files=None else: collection = st.text_area('Name of your new collection:', '') if st.button('Create'): if len(collection)>3: client.create_collection(collection) #collection_name + "_" + re.sub('[^A-Za-z0-9]+', '', model_name) --Problem i added the model to the name -> Better use Metadata :) retrieve_collections.clear() st.write("Collection " +collection+" succesfully created.")