File size: 1,174 Bytes
ce5c00d
c8d35de
ce5c00d
 
31c1456
ce5c00d
298eabd
 
31c1456
ce5c00d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31c1456
ce5c00d
298eabd
ce5c00d
31c1456
ce5c00d
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
from services.embed_service.utils import VTTTranscriptLoader, DocumentEmbedder

class EmbeddingService:
    def __init__(self, conf, pinecone):
        self.conf = conf
        self.pc = pinecone['connection']
        self.pipeline = pinecone['pipeline']
        self.embedder = pinecone['embedder']

    def __enter__(self):
        print("Start Embedding Service")
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        print("Exiting Embedding Service")
    
    def get_transcripts(self, files):
        # Get filepaths and load them in document format
        filepaths = [file.name for file in files]
        loader = VTTTranscriptLoader(filepaths)
        results = loader.load()
            
        return results
    
    def run(self, files):
        # gets the files, cleans them, and loads them into pinecone
        results = self.get_transcripts(files)

        doc_embedder =  DocumentEmbedder(
            pinecone_pipeline=self.pipeline, 
            files=results,
            embedder=self.embedder
        )
        
        # uploads them into pinecone
        doc_embedder.embed()
        
        return "complete"