File size: 8,979 Bytes
38167d4
 
f0f4b86
78040a5
10892df
38167d4
10892df
ba3027e
 
38167d4
ba3027e
d6bb045
38167d4
 
 
 
 
 
 
 
 
 
10892df
 
 
8a5ab3e
38167d4
ba3027e
38167d4
 
8a5ab3e
38167d4
ba3027e
cb8b3fe
810a2b0
f28eb9c
38167d4
8a5ab3e
38167d4
 
ba3027e
8a5ab3e
38167d4
 
 
8a5ab3e
82d483e
ba3027e
8a5ab3e
838749f
58b59bc
838749f
a7a1433
9be91ec
a7a1433
 
 
 
 
e8caf0b
 
 
 
 
 
9be91ec
715429c
9be91ec
 
 
 
 
 
 
8a5ab3e
38167d4
ba3027e
8a5ab3e
38167d4
 
 
8a5ab3e
38167d4
ba3027e
e0f05ce
38167d4
78040a5
 
d8b5c4e
7bd9395
38167d4
8a5ab3e
38167d4
ba3027e
e0f05ce
cb8b3fe
e8eea71
e0f05ce
f0f4b86
 
38167d4
8a5ab3e
38167d4
ba3027e
e0f05ce
38167d4
 
ba3027e
38167d4
 
 
 
 
 
 
8a5ab3e
38167d4
ba3027e
8a5ab3e
38167d4
8a5ab3e
a7a1433
38167d4
8a5ab3e
38167d4
ba3027e
8a5ab3e
38167d4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import pandas as pd
import streamlit as st
from my_model.tabs.run_inference import InferenceRunner
from my_model.tabs.finetuning_evaluation import KBVQAEvaluator
from my_model.state_manager import StateManager

class UIManager():
    """Manages the user interface for the Streamlit application."""

    def __init__(self):
        """Initializes the UIManager with predefined tabs."""
        
        self.tabs = {
            "Home": self.display_home,
            "Dataset Analysis": self.display_dataset_analysis,
            "Finetuning and Evaluation Results": self.display_finetuning_evaluation,
            "Run Inference": self.display_run_inference,
            "Dissertation Report": self.display_dissertation_report,
            "Code": self.display_code,
            "More Pages will follow .. ": self.display_placeholder
        }

        state_manager = StateManager()
        state_manager.initialize_state()

    
    def add_tab(self, tab_name, display_function):
        """Adds a new tab to the UI."""
        self.tabs[tab_name] = display_function

    
    def display_sidebar(self):
        """Displays the sidebar for navigation."""
        
        st.sidebar.title("Navigation")
        selection = st.sidebar.radio("Go to", list(self.tabs.keys()), disabled=st.session_state['loading_in_progress'])
        return selection
        

    def display_selected_page(self, selection):
        """Displays the selected page based on user's choice."""
        
        if selection in self.tabs:
            self.tabs[selection]()

    
    def display_home(self):
        """Displays the Home page of the application."""
        
        st.title('MultiModal Learning for Visual Question Answering using World Knowledge')
        st.text('')
        st.header('(Knowledge-Based Visual Question Answering)')
        col1, col2 = st.columns([3, 1])
        with col1:
            st.text('')
            st.text('')
            st.text('')
            st.write("""\n\n\n\nThis is an interactive application developed to demonstrate my project as part of the dissertation for Masters degree in Artificial Intelligence at the [University of Bath](https://www.bath.ac.uk/). 
                        \n\n\nDeveloped by: [Mohammed H AlHaj](https://www.linkedin.com/in/m7mdal7aj) | Dissertation Supervisor: [Andreas Theophilou](https://researchportal.bath.ac.uk/en/persons/andreas-theophilou)
                        \n\n""")
            st.write("""Navigating the frontier of the Visual Turing Test, this research delves into multimodal learning to bridge the gap between visual perception and linguistic interpretation, a foundational challenge in artificial intelligence. It scrutinizes the integration of visual cognition and external knowledge, emphasizing the pivotal role of the Transformer model in enhancing language processing and supporting complex multimodal tasks.
                        This research explores the task of Knowledge-Based Visual Question Answering (KB-VQA), it examines the influence of Pre-Trained Large Language Models (PT-LLMs) and Pre-Trained Multimodal Models (PT-LMMs), which have transformed the machine learning landscape by utilizing expansive, pre-trained knowledge repositories to tackle complex tasks, thereby enhancing KB-VQA systems.
                        An examination of existing Knowledge-Based Visual Question Answering (KB-VQA) methodologies led to a refined approach that converts visual content into the linguistic domain, creating detailed captions and object enumerations. This process leverages the implicit knowledge and inferential capabilities of PT-LLMs. The research refines the fine-tuning of PT-LLMs by integrating specialized tokens, enhancing the models’ ability to interpret visual contexts. The research also reviews current image representation techniques and knowledge sources, advocating for the utilization of implicit knowledge in PT-LLMs, especially for tasks that do not require specialized expertise.
                        Rigorous ablation experiments conducted to assess the impact of various visual context elements on model performance, with a particular focus on the importance of image descriptions generated during the captioning phase. The study includes a comprehensive analysis of major KB-VQA datasets, specifically the OK-VQA corpus, and critically evaluates the metrics used, incorporating semantic evaluation with GPT-4 to align the assessment with practical application needs.
                        The evaluation results underscore the developed model’s competent and competitive performance. It achieves a VQA score of 63.57% under syntactic evaluation and excels with an Exact Match (EM) score of 68.36%. Further, semantic evaluations yield even more impressive outcomes, with VQA and EM scores of 71.09% and 72.55%, respectively. These results demonstrate that the model effectively applies reasoning over the visual context and successfully retrieves the necessary knowledge to answer visual questions.""")
        with col2:
            st.image("Files/mm.jpeg")
            st.write("""I am profoundly grateful for the support and guidance I have received throughout the course of my dissertation. I would like to extend my deepest appreciation to the following individuals:
                        To my supervisor, Dr. Andreas Theophilou, whose expertise, and insightful guidance have been instrumental in the completion of this research. Your mentorship has not only profoundly shaped my work but also my future endeavours in the field of computer science.
                        Special mention must be made of my mentors at the University of Bath—Dr. Ben Ralph, Dr. Hongping Cai, and Dr. Nadejda Roubtsova. The wealth of knowledge and insights I have gained from you has been indispensable. Your unwavering dedication to academic excellence and steadfast support have been crucial in navigating my academic journey.
                        My colleagues deserve equal gratitude, for their camaraderie and collaborative spirit have not only made this journey feasible but also deeply enjoyable. The shared experiences and the challenges we have overcome together have been integral to my personal and professional growth.
                        Lastly, my heartfelt thanks are extended to my family, whose unyielding love and encouragement have been my steadfast anchor. Your belief in my abilities has consistently inspired me and bolstered my strength throughout this process.
                        This dissertation is not merely a reflection of my individual efforts but stands as a testament to the collective support and wisdom of each individual mentioned above. I am honoured and privileged to be part of such a supportive and enriching academic community.
                        """)
    
    def display_dataset_analysis(self):
        """Displays the Dataset Analysis page."""
        
        st.title("OK-VQA Dataset Analysis")
        st.write("This is a Place Holder until the contents are uploaded.")

    
    def display_finetuning_evaluation(self):
        """Displays the Finetuning and Evaluation Results page."""
        
        st.title("Finetuning and Evaluation Results")
        st.write("This page demonstrates the fine-tuning and model evaluation results")
        st.write("\n")
        evaluator = KBVQAEvaluator()
        evaluator.run_evaluator()

    
    def display_run_inference(self):
        """Displays the Run Inference page."""
        
        st.title("Run Inference")
        st.write("Please note that this is not a general purpose model, it is specifically trained on [OK-VQA Dataset](https://okvqa.allenai.org/) and desgined to give short and direct answers to the given questions about the given image.")
        st.write("\n")
        inference_runner = InferenceRunner()
        inference_runner.run_inference()

    
    def display_dissertation_report(self):
        """Displays the Dissertation Report page."""
        
        st.title("Dissertation Report")
        st.write("Click the link below to view the PDF.")
        # Error handling for file access should be considered here
        st.download_button(
            label="Download PDF",
            data=open("Files/Dissertation Report.pdf", "rb"),
            file_name="example.pdf",
            mime="application/octet-stream"
        )

    
    def display_code(self):
        """Displays the Code page with a link to the project's code repository."""
        
        st.title("Code")
        st.markdown("You can view the code for this project on HuggingFace Space files page.")
        st.markdown("[View Code](https://huggingface.co/spaces/m7mdal7aj/Mohammed_Alhaj_KB-VQA/tree/main)", unsafe_allow_html=True)

    
    def display_placeholder(self):
        """Displays a placeholder for future content."""
        
        st.title("Stay Tuned")
        st.write("This is a Place Holder until the contents are uploaded.")