Spaces:
Sleeping
Sleeping
import os | |
from services.embed_service.utils import VTTTranscriptLoader, DocumentEmbedder | |
class EmbeddingService: | |
def __init__(self, conf, pinecone, session_key): | |
self.conf = conf | |
if self.conf["embeddings"]["override"]: | |
self.sess_key = self.conf["embeddings"]["demo_namespace"] | |
else: | |
self.sess_key = session_key | |
self.pinecones = pinecone.run(namespace=self.sess_key) | |
self.pc = self.pinecones['connection'] | |
self.pipeline = self.pinecones['pipeline'] | |
self.embedder = self.pinecones['embedder'] | |
def __enter__(self): | |
print("Start Embedding Service") | |
return self | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
print("Exiting Embedding Service") | |
def get_transcripts(self, files): | |
# Get filepaths and load them in document format | |
filepaths = [file.name for file in files] | |
loader = VTTTranscriptLoader(filepaths) | |
results = loader.load() | |
return results | |
def run(self, files): | |
# gets the files, cleans them, and loads them into pinecone | |
results = self.get_transcripts(files) | |
doc_embedder = DocumentEmbedder( | |
pinecone_pipeline=self.pipeline, | |
files=results, | |
embedder=self.embedder | |
) | |
# uploads them into pinecone | |
doc_embedder.embed() | |
return "complete" | |