Spaces:
Sleeping
Sleeping
import os | |
from utilities.transcripts import VTTTranscriptLoader, DocumentEmbedder | |
class EmbeddingService: | |
def __init__(self, conf): | |
self.keys = self.get_keys() | |
self.conf = conf | |
def __enter__(self): | |
print("Start Embedding Service") | |
return self | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
print("Exiting Embedding Service") | |
def get_keys(self): | |
"Get keys required to set up loading" | |
return { | |
"pinecone": os.environ["PINECONE_TOKEN"], | |
"huggingface": os.environ["HUGGINGFACE_TOKEN"], | |
"openai": "", # leaving this alone for future use. | |
} | |
def get_transcripts(self, files): | |
# Get filepaths and load them in document format | |
filepaths = [file.name for file in files] | |
loader = VTTTranscriptLoader(filepaths) | |
results = loader.load() | |
return results | |
def run(self, files): | |
# gets the files, cleans them, and loads them into pinecone | |
results = self.get_transcripts(files) | |
doc_embedder = DocumentEmbedder( | |
api_keys=self.keys, | |
files=results, | |
embedding=self.conf["embeddings"]["embedding"], | |
index_name=self.conf["embeddings"]["index_name"], | |
) | |
# uploads them into pinecone | |
doc_embedder.embed() | |
return "complete" | |
class QAService: | |
def __init__(self, conf): | |
self.keys = get_keys() | |
self.conf = conf | |
def run(): | |
return 0 |