Spaces:
Runtime error
Runtime error
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.vectorstores import Chroma | |
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings | |
import os | |
import time | |
import streamlit as st | |
def embed_doc(filename): | |
if len(os.listdir("."))>0: | |
loader=PyPDFLoader(filename) | |
start = time.time() | |
raw_documents = loader.load() | |
# Split text | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=0, | |
length_function=len | |
) | |
documents = text_splitter.split_documents(raw_documents) | |
end = time.time() | |
st.text("Load and split text: "+str(round(end - start,1))) | |
start = time.time() | |
embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base") | |
end = time.time() | |
st.text("Embedding time: "+str(round(end - start,1))) | |
start = time.time() | |
vectorstore = Chroma.from_documents(documents, embeddings) | |
end = time.time() | |
st.text("Vectorizing time: "+str(round(end - start,1))) | |
return vectorstore | |