import re def custom_tokenizer(text): # Lowercase the text text = text.lower() # Remove punctuation text = re.sub(r'[^\w\s]', '', text) # Split the text into tokens tokens = text.split() # Add special tokens for the question tokens.insert(0, '') tokens.append('') return tokens