ak3ra commited on
Commit
122cee1
1 Parent(s): 5f52091
Files changed (1) hide show
  1. rag/rag_pipeline.py +20 -20
rag/rag_pipeline.py CHANGED
@@ -14,6 +14,8 @@ class RAGPipeline:
14
  self.use_semantic_splitter = use_semantic_splitter
15
  self.documents = None
16
  self.index = None
 
 
17
 
18
  def load_documents(self):
19
  if self.documents is None:
@@ -41,6 +43,23 @@ class RAGPipeline:
41
  Document(text=doc_content, id_=f"doc_{index}", metadata=metadata)
42
  )
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def extract_study_info(self) -> Dict[str, Any]:
45
  extraction_prompt = PromptTemplate(
46
  "Based on the given context, please extract the following information about the study:\n"
@@ -74,29 +93,9 @@ class RAGPipeline:
74
 
75
  return extracted_info
76
 
77
- def build_index(self):
78
- if self.index is None:
79
- self.load_documents()
80
- sentence_splitter = SentenceSplitter(chunk_size=128, chunk_overlap=13)
81
-
82
- def _split(text: str) -> List[str]:
83
- return sentence_splitter.split_text(text)
84
-
85
- node_parser = SentenceWindowNodeParser.from_defaults(
86
- sentence_splitter=_split,
87
- window_size=3,
88
- window_metadata_key="window",
89
- original_text_metadata_key="original_text",
90
- )
91
-
92
- nodes = node_parser.get_nodes_from_documents(self.documents)
93
- self.index = VectorStoreIndex(nodes)
94
-
95
  def query(
96
  self, question: str, prompt_template: PromptTemplate = None, **kwargs
97
  ) -> Dict[str, Any]:
98
- self.build_index() # This will only build the index if it hasn't been built yet
99
-
100
  if prompt_template is None:
101
  prompt_template = PromptTemplate(
102
  "Context information is below.\n"
@@ -113,6 +112,7 @@ class RAGPipeline:
113
  query_engine = self.index.as_query_engine(
114
  text_qa_template=prompt_template, similarity_top_k=5
115
  )
 
116
  # Use kwargs to pass additional parameters to the query
117
  response = query_engine.query(question, **kwargs)
118
 
 
14
  self.use_semantic_splitter = use_semantic_splitter
15
  self.documents = None
16
  self.index = None
17
+ self.load_documents()
18
+ self.build_index()
19
 
20
  def load_documents(self):
21
  if self.documents is None:
 
43
  Document(text=doc_content, id_=f"doc_{index}", metadata=metadata)
44
  )
45
 
46
+ def build_index(self):
47
+ if self.index is None:
48
+ sentence_splitter = SentenceSplitter(chunk_size=128, chunk_overlap=13)
49
+
50
+ def _split(text: str) -> List[str]:
51
+ return sentence_splitter.split_text(text)
52
+
53
+ node_parser = SentenceWindowNodeParser.from_defaults(
54
+ sentence_splitter=_split,
55
+ window_size=3,
56
+ window_metadata_key="window",
57
+ original_text_metadata_key="original_text",
58
+ )
59
+
60
+ nodes = node_parser.get_nodes_from_documents(self.documents)
61
+ self.index = VectorStoreIndex(nodes)
62
+
63
  def extract_study_info(self) -> Dict[str, Any]:
64
  extraction_prompt = PromptTemplate(
65
  "Based on the given context, please extract the following information about the study:\n"
 
93
 
94
  return extracted_info
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def query(
97
  self, question: str, prompt_template: PromptTemplate = None, **kwargs
98
  ) -> Dict[str, Any]:
 
 
99
  if prompt_template is None:
100
  prompt_template = PromptTemplate(
101
  "Context information is below.\n"
 
112
  query_engine = self.index.as_query_engine(
113
  text_qa_template=prompt_template, similarity_top_k=5
114
  )
115
+
116
  # Use kwargs to pass additional parameters to the query
117
  response = query_engine.query(question, **kwargs)
118