m7mdal7aj commited on
Commit
f6a1c31
·
verified ·
1 Parent(s): 977f4fb

Update my_model/tabs/dataset_analysis.py

Browse files
Files changed (1) hide show
  1. my_model/tabs/dataset_analysis.py +24 -20
my_model/tabs/dataset_analysis.py CHANGED
@@ -246,33 +246,37 @@ class OKVQADatasetAnalyzer:
246
 
247
 
248
 
249
- def run_dataset_analyzer():
250
-
 
 
 
 
 
 
 
251
  datasets_comparison_table = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="VQA Datasets Comparison")
252
  okvqa_dataset_characteristics = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="OK-VQA Dataset Characteristics")
253
-
254
- val_data = process_okvqa_dataset(config.DATASET_VAL_QUESTIONS_PATH, config.DATASET_VAL_ANNOTATIONS_PATH,
255
- save_to_csv=False)
256
- train_data = process_okvqa_dataset(config.DATASET_TRAIN_QUESTIONS_PATH, config.DATASET_TRAIN_ANNOTATIONS_PATH ,
257
- save_to_csv=False)
258
 
 
 
 
259
 
 
 
260
 
261
- dataset_analyzer = OKVQADatasetAnalyzer(config.DATASET_TRAIN_QUESTIONS_PATH,
262
- config.DATASET_VAL_QUESTIONS_PATH, 'train_test')
263
-
264
  with st.container():
265
  st.markdown("## Overview of KB-VQA Datasets")
266
  col1, col2 = st.columns([2, 1])
267
  with col1:
268
  st.write(" ")
269
  with st.expander("1 - Knowledge-Based VQA (KB-VQA)"):
270
- st.markdown(""" [Knowledge-Based VQA (KB-VQA)](https://arxiv.org/abs/1511.02570): One of the earliest
271
- datasets in this domain, KB-VQA comprises 700 images and 2,402 questions, with each
272
- question associated with both an image and a knowledge base (KB). The KB encapsulates
273
- facts about the world, including object names, properties, and relationships, aiming to
274
- foster models capable of answering questions through reasoning over both the image
275
- and the KB.\n""")
276
  with st.expander("2 - Factual VQA (FVQA)"):
277
  st.markdown(""" [Factual VQA (FVQA)](https://arxiv.org/abs/1606.05433): This dataset includes 2,190
278
  images and 5,826 questions, accompanied by a knowledge base containing 193,449 facts.
@@ -296,6 +300,8 @@ def run_dataset_analyzer():
296
  st.markdown("#### KB-VQA Datasets Comparison")
297
  st.write(datasets_comparison_table, use_column_width=True)
298
  st.write("-----------------------")
 
 
299
  with st.container():
300
  st.write("\n" * 10)
301
  st.markdown("## OK-VQA Dataset")
@@ -307,16 +313,14 @@ def run_dataset_analyzer():
307
  with st.expander("Questions Distribution over Knowledge Category"):
308
  df = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="Question Category Dist")
309
  st.markdown("#### Questions Distribution over Knowledge Category")
310
- dataset_analyzer.plot_bar_chart(df, "Knowledge Category", "Percentage", "Questions Distribution over "
311
- "Knowledge Category")
312
 
313
  with st.expander("Distribution of Question Keywords"):
314
-
315
- #with st.expander("Distribution of Question Keywords"):
316
  dataset_analyzer.categorize_questions()
317
  st.markdown("#### Distribution of Question Keywords")
318
  dataset_analyzer.plot_question_distribution()
319
 
 
320
  with st.container():
321
  with st.expander("Show Dataset Samples"):
322
  st.write(train_data[:10])
 
246
 
247
 
248
 
249
+
250
+ def run_dataset_analyzer() -> None:
251
+ """
252
+ Executes the dataset analysis process and displays the results using Streamlit.
253
+ This function provides an overview of the dataset, it utilizes the OKVQADatasetAnalyzer to visualize
254
+ the data.
255
+ """
256
+
257
+ # Load datasets from Excel
258
  datasets_comparison_table = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="VQA Datasets Comparison")
259
  okvqa_dataset_characteristics = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="OK-VQA Dataset Characteristics")
 
 
 
 
 
260
 
261
+ # Process OK-VQA datasets for validation and training
262
+ val_data = process_okvqa_dataset(config.DATASET_VAL_QUESTIONS_PATH, config.DATASET_VAL_ANNOTATIONS_PATH, save_to_csv=False)
263
+ train_data = process_okvqa_dataset(config.DATASET_TRAIN_QUESTIONS_PATH, config.DATASET_TRAIN_ANNOTATIONS_PATH, save_to_csv=False)
264
 
265
+ # Initialize the dataset analyzer
266
+ dataset_analyzer = OKVQADatasetAnalyzer(config.DATASET_TRAIN_QUESTIONS_PATH, config.DATASET_VAL_QUESTIONS_PATH, 'train_test')
267
 
268
+ # Display KB-VQA datasets overview
 
 
269
  with st.container():
270
  st.markdown("## Overview of KB-VQA Datasets")
271
  col1, col2 = st.columns([2, 1])
272
  with col1:
273
  st.write(" ")
274
  with st.expander("1 - Knowledge-Based VQA (KB-VQA)"):
275
+ st.markdown(""" [Knowledge-Based VQA (KB-VQA)](https://arxiv.org/abs/1511.02570): One of the earliest datasets in this domain, KB-VQA
276
+ comprises 700 images and 2,402 questions, with each question associated with both an image
277
+ and a knowledge base (KB). The KB encapsulates facts about the world, including object
278
+ names, properties, and relationships, aiming to foster models capable of answering
279
+ questions through reasoning over both the image and the KB.\n""")
 
280
  with st.expander("2 - Factual VQA (FVQA)"):
281
  st.markdown(""" [Factual VQA (FVQA)](https://arxiv.org/abs/1606.05433): This dataset includes 2,190
282
  images and 5,826 questions, accompanied by a knowledge base containing 193,449 facts.
 
300
  st.markdown("#### KB-VQA Datasets Comparison")
301
  st.write(datasets_comparison_table, use_column_width=True)
302
  st.write("-----------------------")
303
+
304
+ # Display OK-VQA dataset details
305
  with st.container():
306
  st.write("\n" * 10)
307
  st.markdown("## OK-VQA Dataset")
 
313
  with st.expander("Questions Distribution over Knowledge Category"):
314
  df = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="Question Category Dist")
315
  st.markdown("#### Questions Distribution over Knowledge Category")
316
+ dataset_analyzer.plot_bar_chart(df, "Knowledge Category", "Percentage", "Questions Distribution over Knowledge Category")
 
317
 
318
  with st.expander("Distribution of Question Keywords"):
 
 
319
  dataset_analyzer.categorize_questions()
320
  st.markdown("#### Distribution of Question Keywords")
321
  dataset_analyzer.plot_question_distribution()
322
 
323
+ # Display sample data
324
  with st.container():
325
  with st.expander("Show Dataset Samples"):
326
  st.write(train_data[:10])