Spaces:

Sunbird
/

acres

Running

App Files Files Community

ak3ra commited on Sep 4

Commit

5f52091

•

1 Parent(s): 9f2191f

prompt params

Browse files

Files changed (2) hide show

app.py +15 -23
rag/rag_pipeline.py +34 -1

app.py CHANGED Viewed

@@ -25,31 +25,15 @@ def get_rag_pipeline(study_name):
     return rag_cache[study_name]
-def query_rag(study_name, question, prompt_type):
     rag = get_rag_pipeline(study_name)
     # Prepare a dictionary with all possible prompt parameters
     prompt_params = {
-        "studyid": "",  # retrieve or generate a study ID?
-        "author": "",
-        "year": "",
-        "title": "",
-        "appendix": "",
-        "publication_type": "",
-        "study_design": "",
-        "study_area_region": "",
-        "study_population": "",
-        "immunisable_disease": "",
-        "route_of_administration": "",
-        "duration_of_study": "",
-        "duration_covid19": "",
-        "study_comments": "",
-        "coverage_rates": "",
-        "proportion_recommended_age": "",
-        "immunisation_uptake": "",
-        "drop_out_rates": "",
-        "intentions_to_vaccinate": "",
-        "vaccine_confidence": "",
         "query_str": question,  # Add the question to the prompt parameters
     }
@@ -64,12 +48,20 @@ def query_rag(study_name, question, prompt_type):
     else:
         prompt = None
     response = rag.query(question, prompt, **prompt_params)
     # Format the response as Markdown
-    formatted_response = f"## Question\n\n{response['question']}\n\n## Answer\n\n{response['answer']}\n\n## Sources\n\n"
     for source in response["sources"]:
-        formatted_response += f"- {source['title']} ({source['year']})\n"
     return formatted_response

     return rag_cache[study_name]
+def query_rag(study_name: str, question: str, prompt_type: str) -> str:
     rag = get_rag_pipeline(study_name)
+    # Extract study information using RAG
+    study_info = rag.extract_study_info()
     # Prepare a dictionary with all possible prompt parameters
     prompt_params = {
+        **study_info,  # Unpack the extracted study info
         "query_str": question,  # Add the question to the prompt parameters
     }
     else:
         prompt = None
+    # Use the prompt_params in the query
     response = rag.query(question, prompt, **prompt_params)
     # Format the response as Markdown
+    formatted_response = f"## Question\n\n{question}\n\n## Answer\n\n{response['answer']}\n\n## Sources\n\n"
     for source in response["sources"]:
+        formatted_response += (
+            f"- {source['title']} ({source.get('year', 'Year not specified')})\n"
+        )
+    # Add extracted study information to the response
+    formatted_response += "\n## Extracted Study Information\n\n"
+    for key, value in study_info.items():
+        formatted_response += f"- **{key.replace('_', ' ').title()}**: {value}\n"
     return formatted_response

rag/rag_pipeline.py CHANGED Viewed

@@ -41,6 +41,39 @@ class RAGPipeline:
                     Document(text=doc_content, id_=f"doc_{index}", metadata=metadata)
                 )
     def build_index(self):
         if self.index is None:
             self.load_documents()
@@ -80,7 +113,7 @@ class RAGPipeline:
         query_engine = self.index.as_query_engine(
             text_qa_template=prompt_template, similarity_top_k=5
         )
-        # response = query_engine.query(question)
         response = query_engine.query(question, **kwargs)
         return {

                     Document(text=doc_content, id_=f"doc_{index}", metadata=metadata)
                 )
+    def extract_study_info(self) -> Dict[str, Any]:
+        extraction_prompt = PromptTemplate(
+            "Based on the given context, please extract the following information about the study:\n"
+            "1. Study ID\n"
+            "2. Author(s)\n"
+            "3. Year\n"
+            "4. Title\n"
+            "5. Study design\n"
+            "6. Study area/region\n"
+            "7. Study population\n"
+            "8. Disease under study\n"
+            "9. Duration of study\n"
+            "If the information is not available, please respond with 'Not found' for that field.\n"
+            "Context: {context_str}\n"
+            "Extracted information:"
+        )
+        query_engine = self.index.as_query_engine(
+            text_qa_template=extraction_prompt, similarity_top_k=5
+        )
+        response = query_engine.query("Extract study information")
+        # Parse the response to extract key-value pairs
+        lines = response.response.split("\n")
+        extracted_info = {}
+        for line in lines:
+            if ":" in line:
+                key, value = line.split(":", 1)
+                extracted_info[key.strip().lower().replace(" ", "_")] = value.strip()
+        return extracted_info
     def build_index(self):
         if self.index is None:
             self.load_documents()
         query_engine = self.index.as_query_engine(
             text_qa_template=prompt_template, similarity_top_k=5
         )
+        # Use kwargs to pass additional parameters to the query
         response = query_engine.query(question, **kwargs)
         return {