Spaces:
Running
Running
minor
Browse files- rag/rag_pipeline.py +20 -20
rag/rag_pipeline.py
CHANGED
@@ -14,6 +14,8 @@ class RAGPipeline:
|
|
14 |
self.use_semantic_splitter = use_semantic_splitter
|
15 |
self.documents = None
|
16 |
self.index = None
|
|
|
|
|
17 |
|
18 |
def load_documents(self):
|
19 |
if self.documents is None:
|
@@ -41,6 +43,23 @@ class RAGPipeline:
|
|
41 |
Document(text=doc_content, id_=f"doc_{index}", metadata=metadata)
|
42 |
)
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
def extract_study_info(self) -> Dict[str, Any]:
|
45 |
extraction_prompt = PromptTemplate(
|
46 |
"Based on the given context, please extract the following information about the study:\n"
|
@@ -74,29 +93,9 @@ class RAGPipeline:
|
|
74 |
|
75 |
return extracted_info
|
76 |
|
77 |
-
def build_index(self):
|
78 |
-
if self.index is None:
|
79 |
-
self.load_documents()
|
80 |
-
sentence_splitter = SentenceSplitter(chunk_size=128, chunk_overlap=13)
|
81 |
-
|
82 |
-
def _split(text: str) -> List[str]:
|
83 |
-
return sentence_splitter.split_text(text)
|
84 |
-
|
85 |
-
node_parser = SentenceWindowNodeParser.from_defaults(
|
86 |
-
sentence_splitter=_split,
|
87 |
-
window_size=3,
|
88 |
-
window_metadata_key="window",
|
89 |
-
original_text_metadata_key="original_text",
|
90 |
-
)
|
91 |
-
|
92 |
-
nodes = node_parser.get_nodes_from_documents(self.documents)
|
93 |
-
self.index = VectorStoreIndex(nodes)
|
94 |
-
|
95 |
def query(
|
96 |
self, question: str, prompt_template: PromptTemplate = None, **kwargs
|
97 |
) -> Dict[str, Any]:
|
98 |
-
self.build_index() # This will only build the index if it hasn't been built yet
|
99 |
-
|
100 |
if prompt_template is None:
|
101 |
prompt_template = PromptTemplate(
|
102 |
"Context information is below.\n"
|
@@ -113,6 +112,7 @@ class RAGPipeline:
|
|
113 |
query_engine = self.index.as_query_engine(
|
114 |
text_qa_template=prompt_template, similarity_top_k=5
|
115 |
)
|
|
|
116 |
# Use kwargs to pass additional parameters to the query
|
117 |
response = query_engine.query(question, **kwargs)
|
118 |
|
|
|
14 |
self.use_semantic_splitter = use_semantic_splitter
|
15 |
self.documents = None
|
16 |
self.index = None
|
17 |
+
self.load_documents()
|
18 |
+
self.build_index()
|
19 |
|
20 |
def load_documents(self):
|
21 |
if self.documents is None:
|
|
|
43 |
Document(text=doc_content, id_=f"doc_{index}", metadata=metadata)
|
44 |
)
|
45 |
|
46 |
+
def build_index(self):
|
47 |
+
if self.index is None:
|
48 |
+
sentence_splitter = SentenceSplitter(chunk_size=128, chunk_overlap=13)
|
49 |
+
|
50 |
+
def _split(text: str) -> List[str]:
|
51 |
+
return sentence_splitter.split_text(text)
|
52 |
+
|
53 |
+
node_parser = SentenceWindowNodeParser.from_defaults(
|
54 |
+
sentence_splitter=_split,
|
55 |
+
window_size=3,
|
56 |
+
window_metadata_key="window",
|
57 |
+
original_text_metadata_key="original_text",
|
58 |
+
)
|
59 |
+
|
60 |
+
nodes = node_parser.get_nodes_from_documents(self.documents)
|
61 |
+
self.index = VectorStoreIndex(nodes)
|
62 |
+
|
63 |
def extract_study_info(self) -> Dict[str, Any]:
|
64 |
extraction_prompt = PromptTemplate(
|
65 |
"Based on the given context, please extract the following information about the study:\n"
|
|
|
93 |
|
94 |
return extracted_info
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
def query(
|
97 |
self, question: str, prompt_template: PromptTemplate = None, **kwargs
|
98 |
) -> Dict[str, Any]:
|
|
|
|
|
99 |
if prompt_template is None:
|
100 |
prompt_template = PromptTemplate(
|
101 |
"Context information is below.\n"
|
|
|
112 |
query_engine = self.index.as_query_engine(
|
113 |
text_qa_template=prompt_template, similarity_top_k=5
|
114 |
)
|
115 |
+
|
116 |
# Use kwargs to pass additional parameters to the query
|
117 |
response = query_engine.query(question, **kwargs)
|
118 |
|