KB-VQA

Running

App Files Files Community

m7mdal7aj commited on Jan 15

Commit

e68dc65

•

1 Parent(s): 58b21d4

Upload 2 files

Browse files

completed the detection code and updated some utils functions

Files changed (2) hide show

My_Model/object_detection.py +162 -0
My_Model/utilities.py +277 -0

My_Model/object_detection.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import torch
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+import cv2
+import os
+from utilities import get_path, show_image, show_image_with_matplotlib
+import transformers
+class ObjectDetector:
+    def __init__(self):
+        self.model = None
+        self.processor = None
+        self.model_name = None
+    def load_model(self, model_name='detic', pretrained=True, model_version='yolov5s'):
+        """
+        Load the specified object detection model.
+        :param model_name: Name of the model to load.
+        :param pretrained: Boolean indicating if pretrained model should be used.
+        :param model_version: Version of the model, applicable for YOLOv5.
+        """
+        self.model_name = model_name
+        if model_name == 'detic':
+            self.load_detic_model(pretrained)
+        elif model_name == 'yolov5':
+            self.load_yolov5_model(pretrained, model_version)
+        else:
+            raise ValueError("Unsupported model name")
+    def load_detic_model(self, pretrained):
+        """Load the Detic model."""
+        try:
+            model_path = get_path('deformable-detr-detic', 'Models')
+            from transformers import AutoImageProcessor, AutoModelForObjectDetection
+            self.processor = AutoImageProcessor.from_pretrained(model_path)
+            self.model = AutoModelForObjectDetection.from_pretrained(model_path)
+        except Exception as e:
+            print(f"Error loading Detic model: {e}")
+    def load_yolov5_model(self, pretrained, model_version):
+        """Load the YOLOv5 model."""
+        try:
+            model_path = get_path('yolov5', 'Models')
+            if model_path and os.path.exists(model_path):
+                with os.scandir(model_path) as main_dir:
+                    self.model = torch.hub.load(model_path, model_version, pretrained=pretrained, source="local")
+            else:
+                self.model = torch.hub.load('ultralytics/yolov5', model_version, pretrained=pretrained)
+        except Exception as e:
+            print(f"Error loading YOLOv5 model: {e}")
+    def process_image(self, image_path: str) -> Image.Image:
+        """
+        Process the image from the given path.
+        :param image_path: Path to the image file.
+        :return: Processed image.
+        """
+        with Image.open(image_path) as image:
+            return image.convert("RGB")
+    def detect_objects(self, image: Image.Image, threshold: float = 0.4):
+        """
+        Detect objects in the given image.
+        :param image: Image in which to detect objects.
+        :param threshold: Detection threshold.
+        :return: Tuple of detected objects string and list.
+        """
+        detected_objects_str, detected_objects_list = "", []
+        if self.model_name == 'detic':
+            detected_objects_str, detected_objects_list = self.detect_with_detic(image, threshold)
+        elif self.model_name == 'yolov5':
+            detected_objects_str, detected_objects_list = self.detect_with_yolov5(image, threshold)
+        return detected_objects_str.strip(), detected_objects_list
+    def detect_with_detic(self, image: Image.Image, threshold: float):
+        """Detect objects using Detic model."""
+        inputs = self.processor(images=image, return_tensors="pt")
+        outputs = self.model(**inputs)
+        target_sizes = torch.tensor([image.size[::-1]])
+        results = self.processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=threshold)[
+            0]
+        detected_objects_str = ""
+        detected_objects_list = []
+        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+            if score >= threshold:
+                label_name = self.model.config.id2label[label.item()]
+                box_rounded = [round(coord, 2) for coord in box.tolist()]
+                certainty = round(score.item() * 100, 2)
+                detected_objects_str += f"{{object: {label_name}, bounding box: {box_rounded}, certainty: {certainty}%}}\n"
+                detected_objects_list.append((label_name, box_rounded, certainty))
+        return detected_objects_str, detected_objects_list
+    def detect_with_yolov5(self, image: Image.Image, threshold: float):
+        """Detect objects using YOLOv5 model."""
+        cv2_img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+        results = self.model(cv2_img)
+        detected_objects_str = ""
+        detected_objects_list = []
+        for *bbox, conf, cls in results.xyxy[0]:
+            if conf >= threshold:
+                label_name = results.names[int(cls)]
+                box_rounded = [round(coord.item(), 2) for coord in bbox]  # Convert each tensor to float and round
+                certainty = round(conf.item() * 100, 2)
+                detected_objects_str += f"{{object: {label_name}, bounding box: {box_rounded}, certainty: {certainty}%}}\n"
+                detected_objects_list.append((label_name, box_rounded, certainty))
+        return detected_objects_str, detected_objects_list
+    def draw_boxes(self, image: Image.Image, detected_objects: list, show_confidence: bool = True) -> Image.Image:
+        """
+        Draw bounding boxes around detected objects in the image.
+        :param image: Image on which to draw.
+        :param detected_objects: List of detected objects.
+        :param show_confidence: Boolean to show confidence scores.
+        :return: Image with drawn boxes.
+        """
+        draw = ImageDraw.Draw(image)
+        try:
+            font = ImageFont.truetype("arial.ttf", 15)
+        except IOError:
+            font = ImageFont.load_default()
+        colors = ["red", "green", "blue", "yellow", "purple", "orange"]
+        label_color_map = {}
+        for label_name, box, score in detected_objects:
+            if label_name not in label_color_map:
+                label_color_map[label_name] = colors[len(label_color_map) % len(colors)]
+            color = label_color_map[label_name]
+            draw.rectangle(box, outline=color, width=3)
+            label_text = f"{label_name}"
+            if show_confidence:
+                label_text += f" ({round(score, 2)}%)"
+            draw.text((box[0], box[1]), label_text, fill=color, font=font)
+        return image
+if __name__=="__main__":
+    detector = ObjectDetector()
+    image_path = get_path('horse.jpg', 'Sample_Images')
+    detector.load_model('yolov5')  # pass either 'detic' or 'yolov5'
+    image = detector.process_image(image_path)
+    detected_objects_string, detected_objects_list = detector.detect_objects(image, threshold=0.2)
+    image_with_boxes = detector.draw_boxes(image, detected_objects_list, show_confidence=False)
+    print(detected_objects_string)
+    show_image(image_with_boxes)
+    #show_image_with_matplotlib(image_path)

My_Model/utilities.py ADDED Viewed

	@@ -0,0 +1,277 @@

+import pandas as pd
+from collections import Counter
+import json
+import os
+import IPython.display
+from PIL import Image
+import numpy as np
+import torch
+from IPython import get_ipython
+import sys
+class VQADataProcessor:
+    """
+    A class to process OKVQA dataset.
+    Attributes:
+        questions_file_path (str): The file path for the questions JSON file.
+        annotations_file_path (str): The file path for the annotations JSON file.
+        questions (list): List of questions extracted from the JSON file.
+        annotations (list): List of annotations extracted from the JSON file.
+        df_questions (DataFrame): DataFrame created from the questions list.
+        df_answers (DataFrame): DataFrame created from the annotations list.
+        merged_df (DataFrame): DataFrame resulting from merging questions and answers.
+    """
+    def __init__(self, questions_file_path, annotations_file_path):
+        """
+        Initializes the VQADataProcessor with file paths for questions and annotations.
+        Parameters:
+            questions_file_path (str): The file path for the questions JSON file.
+            annotations_file_path (str): The file path for the annotations JSON file.
+        """
+        self.questions_file_path = questions_file_path
+        self.annotations_file_path = annotations_file_path
+        self.questions, self.annotations = self.read_json_files()
+        self.df_questions = pd.DataFrame(self.questions)
+        self.df_answers = pd.DataFrame(self.annotations)
+        self.merged_df = None
+    def read_json_files(self):
+        """
+        Reads the JSON files for questions and annotations.
+        Returns:
+            tuple: A tuple containing two lists: questions and annotations.
+        """
+        with open(self.questions_file_path, 'r') as file:
+            data = json.load(file)
+            questions = data['questions']
+        with open(self.annotations_file_path, 'r') as file:
+            data = json.load(file)
+            annotations = data['annotations']
+        return questions, annotations
+    @staticmethod
+    def find_most_frequent(my_list):
+        """
+        Finds the most frequent item in a list.
+        Parameters:
+            my_list (list): A list of items.
+        Returns:
+            The most frequent item in the list. Returns None if the list is empty.
+        """
+        if not my_list:
+            return None
+        counter = Counter(my_list)
+        most_common = counter.most_common(1)
+        return most_common[0][0]
+    def merge_dataframes(self):
+        """
+        Merges the questions and answers DataFrames on 'question_id' and 'image_id'.
+        """
+        self.merged_df = pd.merge(self.df_questions, self.df_answers, on=['question_id', 'image_id'])
+    def join_words_with_hyphen(self, sentence):
+        return '-'.join(sentence.split())
+    def process_answers(self):
+        """
+        Processes the answers by extracting raw and processed answers and finding the most frequent ones.
+        """
+        if self.merged_df is not None:
+            self.merged_df['raw_answers'] = self.merged_df['answers'].apply(lambda x: [ans['raw_answer'] for ans in x])
+            self.merged_df['processed_answers'] = self.merged_df['answers'].apply(
+                lambda x: [ans['answer'] for ans in x])
+            self.merged_df['most_frequent_raw_answer'] = self.merged_df['raw_answers'].apply(self.find_most_frequent)
+            self.merged_df['most_frequent_processed_answer'] = self.merged_df['processed_answers'].apply(
+                self.find_most_frequent)
+            self.merged_df.drop(columns=['answers'], inplace=True)
+        else:
+            print("DataFrames have not been merged yet.")
+        # Apply the function to the 'most_frequent_processed_answer' column
+        self.merged_df['single_word_answers'] = self.merged_df['most_frequent_processed_answer'].apply(
+            self.join_words_with_hyphen)
+    def get_processed_data(self):
+        """
+        Retrieves the processed DataFrame.
+        Returns:
+            DataFrame: The processed DataFrame. Returns None if the DataFrame is empty or not processed.
+        """
+        if self.merged_df is not None:
+            return self.merged_df
+        else:
+            print("DataFrame is empty or not processed yet.")
+            return None
+    def save_to_csv(self, df, saved_file_name):
+        if saved_file_name is not None:
+            if ".csv" not in saved_file_name:
+                df.to_csv(os.path.join(saved_file_name, ".csv"), index=None)
+            else:
+                df.to_csv(saved_file_name, index=None)
+        else:
+            df.to_csv("data.csv", index=None)
+    def display_dataframe(self):
+        """
+        Displays the processed DataFrame.
+        """
+        if self.merged_df is not None:
+            print(self.merged_df)
+        else:
+            print("DataFrame is empty.")
+def process_okvqa_dataset(questions_file_path, annotations_file_path, save_to_csv=False, saved_file_name=None):
+    """
+    Processes the OK-VQA dataset given the file paths for questions and annotations.
+    Parameters:
+        questions_file_path (str): The file path for the questions JSON file.
+        annotations_file_path (str): The file path for the annotations JSON file.
+    Returns:
+        DataFrame: The processed DataFrame containing merged and processed VQA data.
+    """
+    # Create an instance of the class
+    processor = VQADataProcessor(questions_file_path, annotations_file_path)
+    # Process the data
+    processor.merge_dataframes()
+    processor.process_answers()
+    # Retrieve the processed DataFrame
+    processed_data = processor.get_processed_data()
+    if save_to_csv:
+        processor.save_to_csv(processed_data, saved_file_name)
+    return processed_data
+def show_image(image):
+    """
+    Display an image in various environments (Jupyter, PyCharm, Hugging Face Spaces).
+    Handles different types of image inputs (file path, PIL Image, numpy array, OpenCV, PyTorch tensor).
+    Args:
+    image (str or PIL.Image or numpy.ndarray or torch.Tensor): The image to display.
+    """
+    in_jupyter = is_jupyter_notebook()
+    # Convert image to PIL Image if it's a file path, numpy array, or PyTorch tensor
+    if isinstance(image, str):
+        if os.path.isfile(image):
+            image = Image.open(image)
+        else:
+            raise ValueError("File path provided does not exist.")
+    elif isinstance(image, np.ndarray):
+        if image.ndim == 3 and image.shape[2] in [3, 4]:
+            image = Image.fromarray(image[..., ::-1] if image.shape[2] == 3 else image)
+        else:
+            image = Image.fromarray(image)
+    elif torch.is_tensor(image):
+        image = Image.fromarray(image.permute(1, 2, 0).numpy().astype(np.uint8))
+    # Display the image
+    if in_jupyter:
+        from IPython.display import display
+        display(image)
+    else:
+        image.show()
+import matplotlib.pyplot as plt
+def show_image_with_matplotlib(image):
+    if isinstance(image, str):
+        image = Image.open(image)
+    elif isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    elif torch.is_tensor(image):
+        image = Image.fromarray(image.permute(1, 2, 0).numpy().astype(np.uint8))
+    plt.imshow(image)
+    plt.axis('off')  # Turn off axis numbers
+    plt.show()
+def is_jupyter_notebook():
+    """
+    Check if the code is running in a Jupyter notebook.
+    Returns:
+        bool: True if running in a Jupyter notebook, False otherwise.
+    """
+    try:
+        from IPython import get_ipython
+        if 'IPKernelApp' not in get_ipython().config:
+            return False
+        if 'ipykernel' in str(type(get_ipython())):
+            return True  # Running in Jupyter Notebook
+    except (NameError, AttributeError):
+        return False  # Not running in Jupyter Notebook
+    return False  # Default to False if none of the above conditions are met
+def is_pycharm():
+    return 'PYCHARM_HOSTED' in os.environ
+def is_google_colab():
+    return 'COLAB_GPU' in os.environ or 'google.colab' in sys.modules
+def get_path(name, path_type):
+    """
+    Generates a path for models, images, or data based on the specified type.
+    Args:
+    name (str): The name of the model, image, or data folder/file.
+    path_type (str): The type of path needed ('models', 'images', or 'data').
+    Returns:
+    str: The full path to the specified resource.
+    """
+    # Get the current working directory (assumed to be inside 'code' folder)
+    current_dir = os.getcwd()
+    # Get the directory one level up (the parent directory)
+    parent_dir = os.path.dirname(current_dir)
+    # Construct the path to the specified folder
+    folder_path = os.path.join(parent_dir, path_type)
+    # Construct the full path to the specific resource
+    full_path = os.path.join(folder_path, name)
+    return full_path
+if __name__ == "__main__":
+    pass
+    #val_data = process_okvqa_dataset('OpenEnded_mscoco_val2014_questions.json', 'mscoco_val2014_annotations.json', save_to_csv=True, saved_file_name="okvqa_val.csv")
+    #train_data = process_okvqa_dataset('OpenEnded_mscoco_train2014_questions.json', 'mscoco_train2014_annotations.json', save_to_csv=True, saved_file_name="okvqa_train.csv")