heikowagner commited on
Commit
4f0dc21
1 Parent(s): fbb697c

remove data

Browse files
.gitignore CHANGED
@@ -6,4 +6,7 @@
6
  root
7
  *.ipynb_checkpoints*
8
  .vscode
9
- /app/mymodels
 
 
 
 
6
  root
7
  *.ipynb_checkpoints*
8
  .vscode
9
+ /app/mymodels
10
+ /app/.cache
11
+ /app/VectorStore*
12
+ *chroma-embeddings.parquet*
app/7fd9b7dc7c8bd6c62fec03923bb04b6ba28e6c736017b43e67f3e808c92438c7.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09bac093b25dfef86ce79a7325d893cf826fb0de7bcdf122b8ab0ec5692425c4
3
- size 3500346
 
 
 
 
app/VectorStore/chroma-embeddings.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:514430ced16df82f6b5355cc14ed912c5af38661418efb691ea8e73e6333ffed
3
- size 5782971
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df81409ab51d0acf63d8e0ad64f6af92daa192afb80667f08fe48fdcba095b1a
3
+ size 9201921
app/VectorStore/index/id_to_uuid_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f39dc0dcfa56bb6584759d134c28bd53ac0165a2873cdd5b9e0ff70244840542
3
- size 131496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25fca5ee26534c9df1cfaf6fe82d8aaea2c65bdc8e81e3c047c7546c7ea33466
3
+ size 153298
app/VectorStore/index/index_52984ff2-d9c3-459b-acc0-0b0aa559d50f.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de4cc0ee24b85680520f48ee0ee673443133127e18533255c60528cfe2f925be
3
- size 13050028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:111dc06e8480103cd32c60d5719a1091ac939ce09dbbdf0a979ed72590dd3095
3
+ size 15211024
app/VectorStore/index/index_metadata_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64da3bdfe4bc7727e421826a6459753a44eabcd37df7fe207fbde1014c0c2fe6
3
  size 74
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:950e65792fc96c61638586850536a6b92d603d0d8e357fa7028dee790eff793d
3
  size 74
app/VectorStore/index/uuid_to_id_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b153d0a7649253a0b5b095f1d126ba5a36b6a650e177b03393bd76cf8b399896
3
- size 153763
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d98cc469e10b5a1d092e046f96a528edf2f3b302da4340e56f27846728b5b853
3
+ size 179261
app/fce3ad7ed281744c55c5838aa5677e15468ae6bae3aa5fe76e6aac20d2c19f2f.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a98a5f02d12b69d1b62de62ffef9fc98fbe229f2369e002e3f47ca78bdefeb3e
3
- size 29884059
 
 
 
 
app/load_vectors.py CHANGED
@@ -94,7 +94,7 @@ def load_from_file(files):
94
 
95
  def load_from_web(urls, cache=True):
96
  docs_list = urls
97
- filename=f"./{sha256(str(urls).encode('utf-8')).hexdigest()}.pkl"
98
 
99
  isFile = os.path.isfile(filename)
100
 
 
94
 
95
  def load_from_web(urls, cache=True):
96
  docs_list = urls
97
+ filename=f"./.cache/{sha256(str(urls).encode('utf-8')).hexdigest()}.pkl"
98
 
99
  isFile = os.path.isfile(filename)
100
 
app/utils.py CHANGED
@@ -4,7 +4,7 @@ from langchain.docstore.document import Document
4
  import chromadb
5
  from chromadb.config import Settings
6
  import load_model
7
- from load_vectors import load_from_file, load_and_split, create_and_add
8
  persist_directory = load_model.persist_directory
9
 
10
  def format_document(document: Document):
@@ -52,15 +52,31 @@ def load_files():
52
  retrieve_collections.clear()
53
  collections = retrieve_collections()
54
 
55
- st.write('Source Documents:')
56
- uploaded_files = st.file_uploader("Choose a PDF file", accept_multiple_files=True)
57
- chunk_size = st.text_area('chunk Size:', 1000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- if st.button('Upload'):
60
- docs = load_from_file(uploaded_files)
61
- sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
62
- create_and_add(selected_collection, sub_docs, "hkunlp/instructor-large")
63
- uploaded_files=None
64
  else:
65
  collection = st.text_area('Name of your new collection:', '')
66
  if st.button('Create'):
 
4
  import chromadb
5
  from chromadb.config import Settings
6
  import load_model
7
+ from load_vectors import load_from_file, load_and_split, create_and_add, load_from_web
8
  persist_directory = load_model.persist_directory
9
 
10
  def format_document(document: Document):
 
52
  retrieve_collections.clear()
53
  collections = retrieve_collections()
54
 
55
+ option = st.radio(
56
+ "",
57
+ options=["Upload Files", "Download Files"],
58
+ )
59
+ if option == "Upload Files":
60
+ st.write('Source Documents:')
61
+ uploaded_files = st.file_uploader("Choose a PDF file", accept_multiple_files=True)
62
+ chunk_size = st.text_area('chunk Size:', 1000)
63
+
64
+ if st.button('Upload'):
65
+ docs = load_from_file(uploaded_files)
66
+ sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
67
+ create_and_add(selected_collection, sub_docs, "hkunlp/instructor-large")
68
+ uploaded_files=None
69
+ else:
70
+ st.write('Source Documents (Comma separated):')
71
+ urls = chunk_size = st.text_area('Urls:', '')
72
+ chunk_size = st.text_area('chunk Size:', 1000)
73
+ urls = urls.replace(",", "" ).replace('"', "" ).split(',')
74
 
75
+ if st.button('Upload'):
76
+ docs = load_from_web(urls)
77
+ sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
78
+ create_and_add(selected_collection, sub_docs, "hkunlp/instructor-large")
79
+ uploaded_files=None
80
  else:
81
  collection = st.text_area('Name of your new collection:', '')
82
  if st.button('Create'):