Spaces:
Sleeping
Sleeping
import os | |
from services.embed_service.utils import VTTTranscriptLoader, DocumentEmbedder | |
class EmbeddingService: | |
def __init__(self, conf, pinecone): | |
self.conf = conf | |
self.pc = pinecone['connection'] | |
self.pipeline = pinecone['pipeline'] | |
self.embedder = pinecone['embedder'] | |
def __enter__(self): | |
print("Start Embedding Service") | |
return self | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
print("Exiting Embedding Service") | |
def get_transcripts(self, files): | |
# Get filepaths and load them in document format | |
filepaths = [file.name for file in files] | |
loader = VTTTranscriptLoader(filepaths) | |
results = loader.load() | |
return results | |
def run(self, files): | |
# gets the files, cleans them, and loads them into pinecone | |
results = self.get_transcripts(files) | |
doc_embedder = DocumentEmbedder( | |
pinecone_pipeline=self.pipeline, | |
files=results, | |
embedder=self.embedder | |
) | |
# uploads them into pinecone | |
doc_embedder.embed() | |
return "complete" | |