Spaces:
Runtime error
Runtime error
# Those are manual mapping that are not caught by our stemming rules or would | |
# would be done incorrectly by our automatic stemming rule. In details, | |
# the keys of the _MANUAL_MATCHES dict contains the original word and the value | |
# contains the transformation of the word expected by the OKVQA stemming rule. | |
# These manual rules were found by checking the `raw_answers` and the `answers` | |
# fields of the released OKVQA dataset and checking all things that were not | |
# properly mapped by our automatic rules. In particular some of the mapping | |
# are sometimes constant, e.g. christmas -> christmas which was incorrectly | |
# singularized by our inflection.singularize. | |
import re | |
import nltk | |
from nltk.corpus.reader import VERB | |
import inflection | |
_MANUAL_MATCHES = { | |
"police": "police", | |
"las": "las", | |
"vegas": "vegas", | |
"yes": "yes", | |
"jeans": "jean", | |
"hell's": "hell", | |
"domino's": "domino", | |
"morning": "morn", | |
"clothes": "cloth", | |
"are": "are", | |
"riding": "ride", | |
"leaves": "leaf", | |
"dangerous": "danger", | |
"clothing": "cloth", | |
"texting": "text", | |
"kiting": "kite", | |
"firefighters": "firefight", | |
"ties": "tie", | |
"married": "married", | |
"teething": "teeth", | |
"gloves": "glove", | |
"tennis": "tennis", | |
"dining": "dine", | |
"directions": "direct", | |
"waves": "wave", | |
"christmas": "christmas", | |
"drives": "drive", | |
"pudding": "pud", | |
"coding": "code", | |
"plating": "plate", | |
"quantas": "quanta", | |
"hornes": "horn", | |
"graves": "grave", | |
"mating": "mate", | |
"paned": "pane", | |
"alertness": "alert", | |
"sunbathing": "sunbath", | |
"tenning": "ten", | |
"wetness": "wet", | |
"urinating": "urine", | |
"sickness": "sick", | |
"braves": "brave", | |
"firefighting": "firefight", | |
"lenses": "lens", | |
"reflections": "reflect", | |
"backpackers": "backpack", | |
"eatting": "eat", | |
"designers": "design", | |
"curiousity": "curious", | |
"playfulness": "play", | |
"blindness": "blind", | |
"hawke": "hawk", | |
"tomatoe": "tomato", | |
"rodeoing": "rodeo", | |
"brightness": "bright", | |
"circuses": "circus", | |
"skateboarders": "skateboard", | |
"staring": "stare", | |
"electronics": "electron", | |
"electicity": "elect", | |
"mountainous": "mountain", | |
"socializing": "social", | |
"hamburgers": "hamburg", | |
"caves": "cave", | |
"transitions": "transit", | |
"wading": "wade", | |
"creame": "cream", | |
"toileting": "toilet", | |
"sautee": "saute", | |
"buildings": "build", | |
"belongings": "belong", | |
"stockings": "stock", | |
"walle": "wall", | |
"cumulis": "cumuli", | |
"travelers": "travel", | |
"conducter": "conduct", | |
"browsing": "brows", | |
"pooping": "poop", | |
"haircutting": "haircut", | |
"toppings": "top", | |
"hearding": "heard", | |
"sunblocker": "sunblock", | |
"bases": "base", | |
"markings": "mark", | |
"mopeds": "mope", | |
"kindergartener": "kindergarten", | |
"pies": "pie", | |
"scrapbooking": "scrapbook", | |
"couponing": "coupon", | |
"meetings": "meet", | |
"elevators": "elev", | |
"lowes": "low", | |
"men's": "men", | |
"childrens": "children", | |
"shelves": "shelve", | |
"paintings": "paint", | |
"raines": "rain", | |
"paring": "pare", | |
"expressions": "express", | |
"routes": "rout", | |
"pease": "peas", | |
"vastness": "vast", | |
"awning": "awn", | |
"boy's": "boy", | |
"drunkenness": "drunken", | |
"teasing": "teas", | |
"conferences": "confer", | |
"ripeness": "ripe", | |
"suspenders": "suspend", | |
"earnings": "earn", | |
"reporters": "report", | |
"kid's": "kid", | |
"containers": "contain", | |
"corgie": "corgi", | |
"porche": "porch", | |
"microwaves": "microwave", | |
"batter's": "batter", | |
"sadness": "sad", | |
"apartments": "apart", | |
"oxygenize": "oxygen", | |
"striping": "stripe", | |
"purring": "pure", | |
"professionals": "profession", | |
"piping": "pipe", | |
"farmer's": "farmer", | |
"potatoe": "potato", | |
"emirates": "emir", | |
"womens": "women", | |
"veteran's": "veteran", | |
"wilderness": "wilder", | |
"propellers": "propel", | |
"alpes": "alp", | |
"charioteering": "chariot", | |
"swining": "swine", | |
"illness": "ill", | |
"crepte": "crept", | |
"adhesives": "adhesive", | |
"regent's": "regent", | |
"decorations": "decor", | |
"rabbies": "rabbi", | |
"overseas": "oversea", | |
"travellers": "travel", | |
"casings": "case", | |
"smugness": "smug", | |
"doves": "dove", | |
"nationals": "nation", | |
"mustange": "mustang", | |
"ringe": "ring", | |
"gondoliere": "gondolier", | |
"vacationing": "vacate", | |
"reminders": "remind", | |
"baldness": "bald", | |
"settings": "set", | |
"glaced": "glace", | |
"coniferous": "conifer", | |
"revelations": "revel", | |
"personals": "person", | |
"daughter's": "daughter", | |
"badness": "bad", | |
"projections": "project", | |
"polarizing": "polar", | |
"vandalizers": "vandal", | |
"minerals": "miner", | |
"protesters": "protest", | |
"controllers": "control", | |
"weddings": "wed", | |
"sometimes": "sometime", | |
"earing": "ear", | |
} | |
class OKVQAStemmer: | |
"""Stemmer to match OKVQA v1.1 procedure.""" | |
def __init__(self): | |
self._wordnet_lemmatizer = nltk.stem.WordNetLemmatizer() | |
def stem(self, input_string): | |
"""Apply stemming.""" | |
word_and_pos = nltk.pos_tag(nltk.tokenize.word_tokenize(input_string)) | |
stemmed_words = [] | |
for w, p in word_and_pos: | |
if w in _MANUAL_MATCHES: | |
w = _MANUAL_MATCHES[w] | |
elif w.endswith("ing"): | |
w = self._wordnet_lemmatizer.lemmatize(w, VERB) | |
elif p.startswith("NNS") or p.startswith("NNPS"): | |
w = inflection.singularize(w) | |
stemmed_words.append(w) | |
return " ".join(stemmed_words) | |
stemmer = OKVQAStemmer() | |
def postprocess_ok_vqa_generation(prediction) -> str: | |
prediction_stem = stemmer.stem(prediction) | |
return prediction_stem | |