import re
import string
from typing import List


def normalize_item(item) -> str:
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(item))))


def remove_duplicates(items: List[str]) -> List[str]:
    unique_items = []
    normalized_unique_items = []

    for item in items:
        normalized_item = normalize_item(item)

        if normalized_item not in normalized_unique_items:
            unique_items.append(item)
            normalized_unique_items.append(normalized_item)

    return unique_items
    
def remove_distractors_duplicate_with_correct_answer(correct: str, distractors: List[str]) -> List[str]:
    normalized_correct = normalize_item(correct)

    filtered_distractors = []

    for distractor in distractors:
        if normalize_item(distractor) != normalized_correct:
            filtered_distractors.append(distractor)

    return filtered_distractors

def clean_text(text: str) -> str:
    # remove brackets
    cleaned_text = re.sub(r"\((.*?)\)", lambda L: "", text)
    # remove square bracket
    cleaned_text = re.sub(r"\[(.*?)\]", lambda L: "", cleaned_text)
    # remove multiple space
    cleaned_text = re.sub(" +", " ", cleaned_text)
    # replace weird hypen
    cleaned_text = cleaned_text.replace('–', '-')

    return cleaned_text