Spaces:
Runtime error
Runtime error
File size: 1,202 Bytes
6feb027 3536102 6feb027 3536102 6feb027 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
import os
import time
import streamlit as st
def embed_doc(filename):
if len(os.listdir("."))>0:
loader=PyPDFLoader(filename)
start = time.time()
raw_documents = loader.load()
# Split text
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=0,
length_function=len
)
documents = text_splitter.split_documents(raw_documents)
end = time.time()
st.text("Load and split text: "+str(round(end - start,1)))
start = time.time()
embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base")
end = time.time()
st.text("Embedding time: "+str(round(end - start,1)))
start = time.time()
vectorstore = Chroma.from_documents(documents, embeddings)
end = time.time()
st.text("Vectorizing time: "+str(round(end - start,1)))
return vectorstore
|