Spaces:

Sunbird
/

acres

Running

App Files Files Community

ak3ra commited on Sep 10

Commit

1eb5783

•

1 Parent(s): 52a64e1

final version of chat interface

Browse files

Files changed (6) hide show

.gitignore +162 -2
__pycache__/app.cpython-311.pyc +0 -0
app.py +179 -89
utils/__pycache__/prompts.cpython-311.pyc +0 -0
utils/helpers.py +70 -33
utils/prompts.py +17 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,162 @@
-rag/__pycache__/rag_pipeline.cpython-311.pyc
-utils/__pycache__/prompts.cpython-311.pyc

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

__pycache__/app.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -1,128 +1,218 @@
 import gradio as gr
-from rag.rag_pipeline import RAGPipeline
-from utils.prompts import highlight_prompt, evidence_based_prompt, sample_questions
 from config import STUDY_FILES
-import json
-# Cache for RAG pipelines
 rag_cache = {}
-def get_rag_pipeline(study_name):
     if study_name not in rag_cache:
         study_file = STUDY_FILES.get(study_name)
-        if study_file:
-            rag_cache[study_name] = RAGPipeline(study_file)
-        else:
             raise ValueError(f"Invalid study name: {study_name}")
     return rag_cache[study_name]
-def chat_function(message, history, study_name, prompt_type):
     if not message.strip():
         return "Please enter a valid query."
     rag = get_rag_pipeline(study_name)
-    prompt = (
-        highlight_prompt
-        if prompt_type == "Highlight"
-        else evidence_based_prompt if prompt_type == "Evidence-based" else None
-    )
     response = rag.query(message, prompt_template=prompt)
     return response.response
-def get_study_info(study_name):
     study_file = STUDY_FILES.get(study_name)
-    if study_file:
-        with open(study_file, "r") as f:
-            data = json.load(f)
-        return f"Number of documents: {len(data)}\nFirst document title: {data[0]['title']}"
-    else:
         return "Invalid study name"
-def update_interface(study_name):
     study_info = get_study_info(study_name)
     questions = sample_questions.get(study_name, [])[:3]
-    return (
-        study_info,
-        *[gr.update(visible=True, value=q) for q in questions],
-        *[gr.update(visible=False) for _ in range(3 - len(questions))],
-    )
-def set_question(question):
-    return question
-with gr.Blocks() as demo:
-    gr.Markdown("# ACRES RAG Platform")
-    with gr.Row():
-        with gr.Column(scale=2):
-            chatbot = gr.Chatbot(elem_id="chatbot", show_label=False, height=400)
-            with gr.Row():
-                msg = gr.Textbox(
                     show_label=False,
-                    placeholder="Type your message here...",
-                    scale=4,
-                    lines=1,
-                    autofocus=True,
                 )
-                send_btn = gr.Button("Send", scale=1)
-            with gr.Accordion("Sample Questions", open=False):
-                sample_btn1 = gr.Button("Sample Question 1", visible=False)
-                sample_btn2 = gr.Button("Sample Question 2", visible=False)
-                sample_btn3 = gr.Button("Sample Question 3", visible=False)
-        with gr.Column(scale=1):
-            gr.Markdown("### Study Information")
-            study_dropdown = gr.Dropdown(
-                choices=list(STUDY_FILES.keys()),
-                label="Select Study",
-                value=list(STUDY_FILES.keys())[0],
             )
-            study_info = gr.Textbox(label="Study Details", lines=4)
-            gr.Markdown("### Settings")
-            prompt_type = gr.Radio(
-                ["Default", "Highlight", "Evidence-based"],
-                label="Prompt Type",
-                value="Default",
             )
-            clear = gr.Button("Clear Chat")
-    def user(user_message, history):
-        if not user_message.strip():
-            return "", history  # Return unchanged if the message is empty
-        return "", history + [[user_message, None]]
-    def bot(history, study_name, prompt_type):
-        if not history:
-            return history
-        user_message = history[-1][0]
-        bot_message = chat_function(user_message, history, study_name, prompt_type)
-        history[-1][1] = bot_message
-        return history
-    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-        bot, [chatbot, study_dropdown, prompt_type], chatbot
-    )
-    send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-        bot, [chatbot, study_dropdown, prompt_type], chatbot
-    )
-    clear.click(lambda: None, None, chatbot, queue=False)
-    study_dropdown.change(
-        fn=update_interface,
-        inputs=study_dropdown,
-        outputs=[study_info, sample_btn1, sample_btn2, sample_btn3],
-    )
-    sample_btn1.click(set_question, inputs=[sample_btn1], outputs=[msg])
-    sample_btn2.click(set_question, inputs=[sample_btn2], outputs=[msg])
-    sample_btn3.click(set_question, inputs=[sample_btn3], outputs=[msg])
 if __name__ == "__main__":
     demo.launch(share=True, debug=True)

+import json
+from typing import List, Tuple
 import gradio as gr
 from config import STUDY_FILES
+from rag.rag_pipeline import RAGPipeline
+from utils.helpers import generate_follow_up_questions
+from utils.prompts import (
+    highlight_prompt,
+    evidence_based_prompt,
+    sample_questions,
+)
 rag_cache = {}
+def get_rag_pipeline(study_name: str) -> RAGPipeline:
+    """Get or create a RAGPipeline instance for the given study."""
     if study_name not in rag_cache:
         study_file = STUDY_FILES.get(study_name)
+        if not study_file:
             raise ValueError(f"Invalid study name: {study_name}")
+        rag_cache[study_name] = RAGPipeline(study_file)
     return rag_cache[study_name]
+def chat_function(
+    message: str, history: List[List[str]], study_name: str, prompt_type: str
+) -> str:
+    """Process a chat message and generate a response using the RAG pipeline."""
     if not message.strip():
         return "Please enter a valid query."
     rag = get_rag_pipeline(study_name)
+    prompt = {
+        "Highlight": highlight_prompt,
+        "Evidence-based": evidence_based_prompt,
+    }.get(prompt_type)
     response = rag.query(message, prompt_template=prompt)
     return response.response
+def get_study_info(study_name: str) -> str:
+    """Retrieve information about the specified study."""
     study_file = STUDY_FILES.get(study_name)
+    if not study_file:
         return "Invalid study name"
+    with open(study_file, "r") as f:
+        data = json.load(f)
+    return f"### Number of documents: {len(data)}"
+def update_interface(study_name: str) -> Tuple[str, gr.update, gr.update, gr.update]:
+    """Update the interface based on the selected study."""
     study_info = get_study_info(study_name)
     questions = sample_questions.get(study_name, [])[:3]
+    visible_questions = [gr.update(visible=True, value=q) for q in questions]
+    hidden_questions = [gr.update(visible=False) for _ in range(3 - len(questions))]
+    return (study_info, *visible_questions, *hidden_questions)
+def set_question(question: str) -> str:
+    return question.lstrip("✨ ")
+def create_gr_interface() -> gr.Blocks:
+    """
+    Create and configure the Gradio interface for the RAG platform.
+    This function sets up the entire user interface, including:
+    - Chat interface with message input and display
+    - Study selection dropdown
+    - Sample and follow-up question buttons
+    - Prompt type selection
+    - Event handlers for user interactions
+    Returns:
+        gr.Blocks: The configured Gradio interface ready for launching.
+    """
+    with gr.Blocks() as demo:
+        gr.Markdown("# ACRES RAG Platform")
+        with gr.Row():
+            with gr.Column(scale=2):
+                chatbot = gr.Chatbot(
+                    elem_id="chatbot",
                     show_label=False,
+                    height=600,
+                    container=False,
+                    show_copy_button=False,
+                    layout="bubble",
+                    visible=True,
+                )
+                with gr.Row():
+                    msg = gr.Textbox(
+                        show_label=False,
+                        placeholder="Type your message here...",
+                        scale=4,
+                        lines=1,
+                        autofocus=True,
+                    )
+                    send_btn = gr.Button("Send", scale=1)
+            with gr.Column(scale=1):
+                gr.Markdown("### Study Information")
+                study_dropdown = gr.Dropdown(
+                    choices=list(STUDY_FILES.keys()),
+                    label="Select Study",
+                    value=list(STUDY_FILES.keys())[0],
                 )
+                study_info = gr.Markdown(label="Study Details")
+                with gr.Accordion("Sample Questions", open=False):
+                    sample_btns = [
+                        gr.Button(f"Sample Question {i+1}", visible=False)
+                        for i in range(3)
+                    ]
+                gr.Markdown("### ✨ Generated Questions")
+                with gr.Row():
+                    follow_up_btns = [
+                        gr.Button(f"Follow-up {i+1}", visible=False) for i in range(3)
+                    ]
+                gr.Markdown("### Settings")
+                prompt_type = gr.Radio(
+                    ["Default", "Highlight", "Evidence-based"],
+                    label="Prompt Type",
+                    value="Default",
+                )
+                clear = gr.Button("Clear Chat")
+        def user(
+            user_message: str, history: List[List[str]]
+        ) -> Tuple[str, List[List[str]]]:
+            return "", (
+                history + [[user_message, None]] if user_message.strip() else history
             )
+        def bot(
+            history: List[List[str]], study_name: str, prompt_type: str
+        ) -> Tuple[List[List[str]], gr.update, gr.update, gr.update]:
+            """
+            Generate bot response and update the interface.
+            This function:
+            1. Processes the latest user message
+            2. Generates a response using the RAG pipeline
+            3. Updates the chat history
+            4. Generates follow-up questions
+            5. Prepares interface updates for follow-up buttons
+            Args:
+                history (List[List[str]]): The current chat history.
+                study_name (str): The name of the current study.
+                prompt_type (str): The type of prompt being used.
+            Returns:
+                Tuple[List[List[str]], gr.update, gr.update, gr.update]:
+                Updated chat history and interface components for follow-up questions.
+            """
+            if not history:
+                return history, [], [], []
+            user_message = history[-1][0]
+            bot_message = chat_function(user_message, history, study_name, prompt_type)
+            history[-1][1] = bot_message
+            rag = get_rag_pipeline(study_name)
+            follow_up_questions = generate_follow_up_questions(
+                rag, bot_message, user_message, study_name
             )
+            visible_questions = [
+                gr.update(visible=True, value=q) for q in follow_up_questions
+            ]
+            hidden_questions = [
+                gr.update(visible=False) for _ in range(3 - len(follow_up_questions))
+            ]
+            return (history, *visible_questions, *hidden_questions)
+        msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot,
+            [chatbot, study_dropdown, prompt_type],
+            [chatbot, *follow_up_btns],
+        )
+        send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot,
+            [chatbot, study_dropdown, prompt_type],
+            [chatbot, *follow_up_btns],
+        )
+        for btn in follow_up_btns + sample_btns:
+            btn.click(set_question, inputs=[btn], outputs=[msg])
+        clear.click(lambda: None, None, chatbot, queue=False)
+        study_dropdown.change(
+            fn=update_interface,
+            inputs=study_dropdown,
+            outputs=[study_info, *sample_btns],
+        )
+    return demo
+demo = create_gr_interface()
 if __name__ == "__main__":
+    # demo = create_gr_interface()
     demo.launch(share=True, debug=True)

utils/__pycache__/prompts.cpython-311.pyc CHANGED Viewed

Binary files a/utils/__pycache__/prompts.cpython-311.pyc and b/utils/__pycache__/prompts.cpython-311.pyc differ

utils/helpers.py CHANGED Viewed

@@ -1,42 +1,79 @@
 from typing import Dict, Any
 from llama_index.core import Response
-def process_response(response: Response) -> Dict[str, Any]:
-    source_nodes = response.source_nodes
-    sources = {}
-    for i, node in enumerate(source_nodes, 1):
-        source = format_source(node.metadata)
-        if source not in sources.values():
-            sources[i] = source
-    markdown_text = response.response + "\n\n### Sources\n\n"
-    raw_text = response.response + "\n\nSources:\n"
-    for i, source in sources.items():
-        markdown_text += f"{i}. {source}\n"
-        raw_text += f"[{i}] {source}\n"
-    return {"markdown": markdown_text, "raw": raw_text, "sources": sources}
-def format_source(metadata: Dict[str, Any]) -> str:
-    authors = metadata.get("authors", "Unknown Author")
-    year = metadata.get("year", "n.d.")
-    title = metadata.get("title", "Untitled")
-    author_list = authors.split(",")
-    if len(author_list) > 2:
-        formatted_authors = f"{author_list[0].strip()} et al."
-    elif len(author_list) == 2:
-        formatted_authors = f"{author_list[0].strip()} and {author_list[1].strip()}"
     else:
-        formatted_authors = author_list[0].strip()
-    year = "n.d." if year is None or year == "None" else str(year)
-    max_title_length = 250
-    if len(title) > max_title_length:
-        title = title[:max_title_length] + "..."
-    return f"{formatted_authors} ({year}). {title}"

 from typing import Dict, Any
 from llama_index.core import Response
+from typing import List
+from rag.rag_pipeline import RAGPipeline
+from utils.prompts import (
+    structured_follow_up_prompt,
+    VaccineCoverageVariables,
+    StudyCharacteristics,
+)
+def generate_follow_up_questions(
+    rag: RAGPipeline, response: str, query: str, study_name: str
+) -> List[str]:
+    """
+    Generates follow-up questions based on the given RAGPipeline, response, query, and study_name.
+    Args:
+        rag (RAGPipeline): The RAGPipeline object used for generating follow-up questions.
+        response (str): The response to the initial query.
+        query (str): The initial query.
+        study_name (str): The name of the study.
+    Returns:
+        List[str]: A list of generated follow-up questions.
+    Raises:
+        None
+    """
+    # Determine the study type based on the study_name
+    if "Vaccine Coverage" in study_name:
+        study_type = "Vaccine Coverage"
+        key_variables = list(VaccineCoverageVariables.__annotations__.keys())
+    elif "Ebola Virus" in study_name:
+        study_type = "Ebola Virus"
+        key_variables = [
+            "SAMPLE_SIZE",
+            "PLASMA_TYPE",
+            "DOSAGE",
+            "FREQUENCY",
+            "SIDE_EFFECTS",
+            "VIRAL_LOAD_CHANGE",
+            "SURVIVAL_RATE",
+        ]
+    elif "Gene Xpert" in study_name:
+        study_type = "Gene Xpert"
+        key_variables = [
+            "OBJECTIVE",
+            "OUTCOME_MEASURES",
+            "SENSITIVITY",
+            "SPECIFICITY",
+            "COST_COMPARISON",
+            "TURNAROUND_TIME",
+        ]
     else:
+        study_type = "General"
+        key_variables = list(StudyCharacteristics.__annotations__.keys())
+    # Add key variables to the context
+    context = f"Study type: {study_type}\nKey variables to consider: {', '.join(key_variables)}\n\n{response}"
+    follow_up_response = rag.query(
+        structured_follow_up_prompt.format(
+            context_str=context,
+            query_str=query,
+            response_str=response,
+            study_type=study_type,
+        )
+    )
+    questions = follow_up_response.response.strip().split("\n")
+    cleaned_questions = []
+    for q in questions:
+        # Remove leading numbers and periods, and strip whitespace
+        cleaned_q = q.split(". ", 1)[-1].strip()
+        # Ensure the question ends with a question mark
+        if cleaned_q and not cleaned_q.endswith("?"):
+            cleaned_q += "?"
+        if cleaned_q:
+            cleaned_questions.append(f"✨ {cleaned_q}")
+    return cleaned_questions[:3]

utils/prompts.py CHANGED Viewed

@@ -116,3 +116,20 @@ evidence_based_prompt = PromptTemplate(
     "If you're unsure about a source, use [?]. "
     "Ensure that EVERY statement from the context is properly cited."
 )

     "If you're unsure about a source, use [?]. "
     "Ensure that EVERY statement from the context is properly cited."
 )
+structured_follow_up_prompt = PromptTemplate(
+    "Context information is below.\n"
+    "---------------------\n"
+    "{context_str}\n"
+    "---------------------\n"
+    "Original question: {query_str}\n"
+    "Response: {response_str}\n"
+    "Study type: {study_type}\n"
+    "Based on the above information and the study type, generate 3 follow-up questions that help extract key variables or information from the study. "
+    "Focus on the following aspects:\n"
+    "1. Any missing key variables that are typically reported in this type of study.\n"
+    "2. Clarification on methodology or results that might affect the interpretation of the study.\n"
+    "3. Potential implications or applications of the study findings.\n"
+    "Ensure each question is specific, relevant to the study type, and ends with a question mark."
+)