tykiww commited on
Commit
ce5c00d
·
verified ·
1 Parent(s): afb9844

Create embed.py

Browse files
Files changed (1) hide show
  1. services/embed_service/embed.py +41 -0
services/embed_service/embed.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from ..utilities.transcripts import VTTTranscriptLoader, DocumentEmbedder
3
+ from ..utilities.setup import get_files
4
+
5
+
6
+
7
+ class EmbeddingService:
8
+ def __init__(self, conf):
9
+ self.keys = get_files.get_keys()
10
+ self.conf = conf
11
+
12
+ def __enter__(self):
13
+ print("Start Embedding Service")
14
+ return self
15
+
16
+ def __exit__(self, exc_type, exc_val, exc_tb):
17
+ print("Exiting Embedding Service")
18
+
19
+ def get_transcripts(self, files):
20
+ # Get filepaths and load them in document format
21
+ filepaths = [file.name for file in files]
22
+ loader = VTTTranscriptLoader(filepaths)
23
+ results = loader.load()
24
+
25
+ return results
26
+
27
+ def run(self, files):
28
+ # gets the files, cleans them, and loads them into pinecone
29
+ results = self.get_transcripts(files)
30
+
31
+ doc_embedder = DocumentEmbedder(
32
+ api_keys=self.keys,
33
+ files=results,
34
+ embedding=self.conf["embeddings"]["embedding"],
35
+ index_name=self.conf["embeddings"]["index_name"],
36
+ )
37
+
38
+ # uploads them into pinecone
39
+ doc_embedder.embed()
40
+
41
+ return "complete"