Spaces:

Sunbird
/

acres

Running

App Files Files Community

ak3ra commited on Sep 4

Commit

6a076b8

•

1 Parent(s): d0d7d0e

custom prompts

Browse files

Files changed (4) hide show

app.py +41 -11
rag/rag_pipeline.py +9 -2
requirements.txt +2 -1
utils/prompts.py +98 -0

app.py CHANGED Viewed

@@ -2,9 +2,13 @@ import gradio as gr
 import json
 from rag.rag_pipeline import RAGPipeline
 from utils.prompts import highlight_prompt, evidence_based_prompt
 from config import STUDY_FILES
-# Cache for RAG pipelines
 rag_cache = {}
@@ -25,13 +29,19 @@ def query_rag(study_name, question, prompt_type):
         prompt = highlight_prompt
     elif prompt_type == "Evidence-based":
         prompt = evidence_based_prompt
     else:
         prompt = None
     response = rag.query(question, prompt)
-    formatted_response = (
-        f"## Question\n\n{question}\n\n## Answer\n\n{response.response}"
-    )
     return formatted_response
@@ -46,6 +56,10 @@ def get_study_info(study_name):
         return "Invalid study name"
 with gr.Blocks() as demo:
     gr.Markdown("# RAG Pipeline Demo")
@@ -53,21 +67,37 @@ with gr.Blocks() as demo:
         study_dropdown = gr.Dropdown(
             choices=list(STUDY_FILES.keys()), label="Select Study"
         )
-        study_info = gr.Textbox(label="Study Information", interactive=False)
     study_dropdown.change(get_study_info, inputs=[study_dropdown], outputs=[study_info])
     with gr.Row():
         question_input = gr.Textbox(label="Enter your question")
-        prompt_type = gr.Radio(
-            ["Default", "Highlight", "Evidence-based"],
-            label="Prompt Type",
-            value="Default",
-        )
     submit_button = gr.Button("Submit")
-    # answer_output = gr.Textbox(label="Answer")
     answer_output = gr.Markdown(label="Answer")
     submit_button.click(

 import json
 from rag.rag_pipeline import RAGPipeline
 from utils.prompts import highlight_prompt, evidence_based_prompt
+from utils.custom_prompts import (
+    study_characteristics_prompt,
+    vaccine_coverage_prompt,
+    sample_questions,
+)
 from config import STUDY_FILES
 rag_cache = {}
         prompt = highlight_prompt
     elif prompt_type == "Evidence-based":
         prompt = evidence_based_prompt
+    elif prompt_type == "Study Characteristics":
+        prompt = study_characteristics_prompt
+    elif prompt_type == "Vaccine Coverage":
+        prompt = vaccine_coverage_prompt
     else:
         prompt = None
     response = rag.query(question, prompt)
+    # Format the response as Markdown
+    formatted_response = f"## Question\n\n{response['question']}\n\n## Answer\n\n{response['answer']}\n\n## Sources\n\n"
+    for source in response["sources"]:
+        formatted_response += f"- {source['title']} ({source['year']})\n"
     return formatted_response
         return "Invalid study name"
+def update_sample_questions(study_name):
+    return gr.Dropdown.update(choices=sample_questions.get(study_name, []))
 with gr.Blocks() as demo:
     gr.Markdown("# RAG Pipeline Demo")
         study_dropdown = gr.Dropdown(
             choices=list(STUDY_FILES.keys()), label="Select Study"
         )
+        study_info = gr.Markdown(label="Study Information")
     study_dropdown.change(get_study_info, inputs=[study_dropdown], outputs=[study_info])
     with gr.Row():
         question_input = gr.Textbox(label="Enter your question")
+        sample_question_dropdown = gr.Dropdown(choices=[], label="Sample Questions")
+    study_dropdown.change(
+        update_sample_questions,
+        inputs=[study_dropdown],
+        outputs=[sample_question_dropdown],
+    )
+    sample_question_dropdown.change(
+        lambda x: x, inputs=[sample_question_dropdown], outputs=[question_input]
+    )
+    prompt_type = gr.Radio(
+        [
+            "Default",
+            "Highlight",
+            "Evidence-based",
+            "Study Characteristics",
+            "Vaccine Coverage",
+        ],
+        label="Prompt Type",
+        value="Default",
+    )
     submit_button = gr.Button("Submit")
     answer_output = gr.Markdown(label="Answer")
     submit_button.click(

rag/rag_pipeline.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # rag/rag_pipeline.py
 import json
 from llama_index.core import Document, VectorStoreIndex
 from llama_index.core.node_parser import SentenceWindowNodeParser, SentenceSplitter
 from llama_index.core import PromptTemplate
@@ -58,7 +59,9 @@ class RAGPipeline:
             nodes = node_parser.get_nodes_from_documents(self.documents)
             self.index = VectorStoreIndex(nodes)
-    def query(self, question, prompt_template=None):
         self.build_index()  # This will only build the index if it hasn't been built yet
         if prompt_template is None:
@@ -79,4 +82,8 @@ class RAGPipeline:
         )
         response = query_engine.query(question)
-        return response

 # rag/rag_pipeline.py
 import json
+from typing import Dict, Any
 from llama_index.core import Document, VectorStoreIndex
 from llama_index.core.node_parser import SentenceWindowNodeParser, SentenceSplitter
 from llama_index.core import PromptTemplate
             nodes = node_parser.get_nodes_from_documents(self.documents)
             self.index = VectorStoreIndex(nodes)
+    def query(
+        self, question: str, prompt_template: PromptTemplate = None
+    ) -> Dict[str, Any]:
         self.build_index()  # This will only build the index if it hasn't been built yet
         if prompt_template is None:
         )
         response = query_engine.query(question)
+        return {
+            "question": question,
+            "answer": response.response,
+            "sources": [node.metadata for node in response.source_nodes],
+        }

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 gradio
 llama-index
 openai
-pandas

 gradio
 llama-index
 openai
+pandas
+pydantic

utils/prompts.py CHANGED Viewed

@@ -1,4 +1,102 @@
 from llama_index.core import PromptTemplate
 highlight_prompt = PromptTemplate(
     "Context information is below.\n"

 from llama_index.core import PromptTemplate
+from typing import Optional, List
+from pydantic import BaseModel, Field
+from llama_index.core.prompts import PromptTemplate
+class StudyCharacteristics(BaseModel):
+    STUDYID: str
+    AUTHOR: str
+    YEAR: int
+    TITLE: str
+    APPENDIX: Optional[str]
+    PUBLICATION_TYPE: str
+    STUDY_DESIGN: str
+    STUDY_AREA_REGION: str
+    STUDY_POPULATION: str
+    IMMUNISABLE_DISEASE_UNDER_STUDY: str
+    ROUTE_OF_VACCINE_ADMINISTRATION: str
+    DURATION_OF_STUDY: str
+    DURATION_IN_RELATION_TO_COVID19: str
+    STUDY_COMMENTS: Optional[str]
+class VaccineCoverageVariables(BaseModel):
+    STUDYID: str
+    AUTHOR: str
+    YEAR: int
+    TITLE: str
+    VACCINE_COVERAGE_RATES: float = Field(..., ge=0, le=100)
+    PROPORTION_ADMINISTERED_WITHIN_RECOMMENDED_AGE: float = Field(..., ge=0, le=100)
+    IMMUNISATION_UPTAKE: float = Field(..., ge=0, le=100)
+    VACCINE_DROP_OUT_RATES: float = Field(..., ge=0, le=100)
+    INTENTIONS_TO_VACCINATE: float = Field(..., ge=0, le=100)
+    VACCINE_CONFIDENCE: float = Field(..., ge=0, le=100)
+    STUDY_COMMENTS: Optional[str]
+study_characteristics_prompt = PromptTemplate(
+    "Based on the given text, extract the following study characteristics:\n"
+    "STUDYID: {studyid}\n"
+    "AUTHOR: {author}\n"
+    "YEAR: {year}\n"
+    "TITLE: {title}\n"
+    "APPENDIX: {appendix}\n"
+    "PUBLICATION_TYPE: {publication_type}\n"
+    "STUDY_DESIGN: {study_design}\n"
+    "STUDY_AREA_REGION: {study_area_region}\n"
+    "STUDY_POPULATION: {study_population}\n"
+    "IMMUNISABLE_DISEASE_UNDER_STUDY: {immunisable_disease}\n"
+    "ROUTE_OF_VACCINE_ADMINISTRATION: {route_of_administration}\n"
+    "DURATION_OF_STUDY: {duration_of_study}\n"
+    "DURATION_IN_RELATION_TO_COVID19: {duration_covid19}\n"
+    "STUDY_COMMENTS: {study_comments}\n"
+    "Provide the information in a JSON format. If a field is not found, leave it as null."
+)
+vaccine_coverage_prompt = PromptTemplate(
+    "Based on the given text, extract the following vaccine coverage variables:\n"
+    "STUDYID: {studyid}\n"
+    "AUTHOR: {author}\n"
+    "YEAR: {year}\n"
+    "TITLE: {title}\n"
+    "VACCINE_COVERAGE_RATES: {coverage_rates}\n"
+    "PROPORTION_ADMINISTERED_WITHIN_RECOMMENDED_AGE: {proportion_recommended_age}\n"
+    "IMMUNISATION_UPTAKE: {immunisation_uptake}\n"
+    "VACCINE_DROP_OUT_RATES: {drop_out_rates}\n"
+    "INTENTIONS_TO_VACCINATE: {intentions_to_vaccinate}\n"
+    "VACCINE_CONFIDENCE: {vaccine_confidence}\n"
+    "STUDY_COMMENTS: {study_comments}\n"
+    "Provide the information in a JSON format. For numerical values, provide percentages as floats between 0 and 100. If a field is not found, leave it as null."
+)
+sample_questions = {
+    "Vaccine Coverage": [
+        "What are the vaccine coverage rates reported in the study?",
+        "What proportion of vaccines were administered within the recommended age range?",
+        "What is the immunisation uptake reported in the study?",
+        "What are the vaccine drop-out rates mentioned in the document?",
+        "What are the intentions to vaccinate reported in the study?",
+        "How is vaccine confidence described in the document?",
+    ],
+    "Ebola Virus": [
+        "What is the sample size of the study?",
+        "What is the type of plasma used in the study?",
+        "What is the dosage and frequency of administration of the plasma?",
+        "Are there any reported side effects?",
+        "What is the change in viral load after treatment?",
+        "How many survivors were there in the intervention group compared to the control group?",
+    ],
+    "Gene Xpert": [
+        "What is the main objective of the study?",
+        "What is the study design?",
+        "What disease condition is being studied?",
+        "What are the main outcome measures in the study?",
+        "What is the sensitivity and specificity of the Gene Xpert test?",
+        "How does the cost of the Gene Xpert testing strategy compare to other methods?",
+    ],
+}
 highlight_prompt = PromptTemplate(
     "Context information is below.\n"