Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -84,9 +84,6 @@ import os
|
|
84 |
import sys
|
85 |
from langchain.chains import ConversationalRetrievalChain
|
86 |
from langchain.document_loaders import DirectoryLoader, TextLoader
|
87 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
88 |
-
from langchain.indexes import VectorstoreIndexCreator
|
89 |
-
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
|
90 |
from langchain.text_splitter import CharacterTextSplitter
|
91 |
from langchain.vectorstores import Chroma
|
92 |
import gradio as gr
|
@@ -115,10 +112,12 @@ for f in os.listdir("multiple_docs"):
|
|
115 |
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
|
116 |
docs = splitter.split_documents(docs)
|
117 |
|
118 |
-
# Convert the document chunks to
|
119 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
120 |
-
embeddings =
|
121 |
-
|
|
|
|
|
122 |
vectorstore.persist()
|
123 |
|
124 |
# Load the Hugging Face model for text generation
|
@@ -171,3 +170,4 @@ demo.launch(debug=True)
|
|
171 |
|
172 |
|
173 |
|
|
|
|
84 |
import sys
|
85 |
from langchain.chains import ConversationalRetrievalChain
|
86 |
from langchain.document_loaders import DirectoryLoader, TextLoader
|
|
|
|
|
|
|
87 |
from langchain.text_splitter import CharacterTextSplitter
|
88 |
from langchain.vectorstores import Chroma
|
89 |
import gradio as gr
|
|
|
112 |
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
|
113 |
docs = splitter.split_documents(docs)
|
114 |
|
115 |
+
# Convert the document chunks to embeddings
|
116 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
117 |
+
embeddings = [embedding_model.encode(doc.content) for doc in docs]
|
118 |
+
|
119 |
+
# Save the embeddings to the vector store
|
120 |
+
vectorstore = Chroma.from_embeddings(embeddings=embeddings, documents=docs, persist_directory="./data")
|
121 |
vectorstore.persist()
|
122 |
|
123 |
# Load the Hugging Face model for text generation
|
|
|
170 |
|
171 |
|
172 |
|
173 |
+
|