import gradio as gr
from transformers import AutoModelForSeq2SeqLM
from transformers import AlbertTokenizer
tokenizer = AlbertTokenizer.from_pretrained(
"prajdabre/CreoleM2M", do_lower_case=False, use_fast=False, keep_accents=True)
model = AutoModelForSeq2SeqLM.from_pretrained(
"prajdabre/CreoleM2M").eval()
bos_id = tokenizer._convert_token_to_id_with_added_voc("")
eos_id = tokenizer._convert_token_to_id_with_added_voc("")
pad_id = tokenizer._convert_token_to_id_with_added_voc("")
CREOLE = {"Hawaiian Pidgin": "hwc", "Saint Lucian Creole": "acf", "Belizean Creole": "bzj", "Chavacano Creole": "cbk", "Seychellois Creole": "crs", "Sranan Tongo": "srn", "Aukan": "djk", "Gullah": "gul", "San Andrés–Providencia Creole": "icr", "Jamaican Creole": "jam", "Mauritian Creole": "mfe", "Papiamento": "pap", "Pijin": "pis", "Tok Pisin": "tpi", "Torres Strait Creole": "tcs", "Australian Kriol": "rop", "Sango": "sag", "Saramaccan": "srm", "Bislama": "bis", "Nigerian Pidgin": "pcm", "Sierra Leonean Creole": "kri", "Haitian Creole": "hat", "Kupang Malay": "mkn", "Tetun Dili": "tdt", "Malay Baba": "mbf", "Kituba": "ktu", "English": "eng"}
def generate(input, slang, tlang):
slang = CREOLE[slang]
tlang = CREOLE[tlang]
inp = tokenizer(input.strip() + " <2" + slang + ">",
add_special_tokens=False, return_tensors="pt", padding=True).input_ids
if (slang != "eng" and tlang == "eng") or (slang == "eng" and tlang != "eng") or (slang == tlang):
model_output = model.generate(inp, use_cache=True, num_beams=1, max_length=int(2*len(inp[0])), min_length=1, early_stopping=True, pad_token_id=pad_id,
bos_token_id=bos_id, eos_token_id=eos_id, decoder_start_token_id=tokenizer._convert_token_to_id_with_added_voc("<2"+tlang+">"))
decoded_output = tokenizer.decode(
model_output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
elif slang != tlang:
model_output = model.generate(inp, use_cache=True, num_beams=1, max_length=int(2*len(inp[0])), min_length=1, early_stopping=True, pad_token_id=pad_id,
bos_token_id=bos_id, eos_token_id=eos_id, decoder_start_token_id=tokenizer._convert_token_to_id_with_added_voc("<2eng>"))
decoded_output = tokenizer.decode(
model_output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
inp = tokenizer(decoded_output + " <2eng>",
add_special_tokens=False, return_tensors="pt", padding=True).input_ids
model_output = model.generate(inp, use_cache=True, num_beams=1, max_length=int(2*len(inp[0])), min_length=1, early_stopping=True, pad_token_id=pad_id,
bos_token_id=bos_id, eos_token_id=eos_id, decoder_start_token_id=tokenizer._convert_token_to_id_with_added_voc("<2"+tlang+">"))
decoded_output = tokenizer.decode(
model_output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
return decoded_output
languages = list(CREOLE.keys())
src_language_drop_down = gr.inputs.Dropdown(
languages, type="value", default="Hawaiian Pidgin", label="Select source language")
tgt_language_drop_down = gr.inputs.Dropdown(
languages, type="value", default="English", label="Select target language")
text = gr.inputs.Textbox(lines=1, placeholder="Enter text here...",
default="", label="Enter text in the source language")
text_ouptut = gr.outputs.Textbox(
type="text", label="View translation in the target language")
supported_lang = ', '.join(languages)
examples = [
['Mé lè sé sòlda-a mawéy pou yo té bat li , Pòl di ofisyé-a ki doubout la-a , “ Ès lwa-a di ou sa bat on jan Ronm si ou pòkò menm fè lodyans pou sa ? ”', "Saint Lucian Creole", "English"],
['Be taem oli fasemgud hem blong wipim hem , Pol i talem long kapten blong olgeta , we i stap stanap long ples ya se , “ ! E ! Mi mi sitisen blong Rom ya . Yufala i no jajem mi yet . ! Olsem wanem yufala i wantem wipim mi ? ”', "Bislama", "English"],
['Wail di soalja dehn mi-di tai op Paal fi beet ahn , Paal aks wan a di aafisa dehn weh mi-di stan op kloas tu ahn , “ Tel mi , ih leegal fi beet wahn Roaman sitizn bifoa unu chrai ahn da koat ? ”', "Belizean Creole", "English"],
['Mientras ta amarra sila con Pablo para latiga , ya habla le con el capitan quien talla parao , “ Tiene ba uste el derecho para latiga con un ciudadano Romano que nuay pa pasa investigacion de algun crimen ? ”', "Chavacano Creole", "English"],
['Kan zot tin anmar li pour li ganny fwete , Pol ti dir avek sa zofisye ki ti la , “ Eski ou annan drwa fwet en sitwayen Romen ki pan ganny zize ? ”', "Seychellois Creole", "English"],
['Den tei en poti fu leli buba . Ma a piki a ofisii di mu meke den du dati taki : “ U tei mi enke foluku fu Loma Foto fu wipi ondoosuku ! Ma kownu anda weiti taki : Na lanti fu kuutu mu ondoosuku foluku fu Loma Foto . A sowtu wipi ya a ganda mindii noiti mu pasa . ’ ”', "Aukan", "English"],
['Bot wen dey tie Paul op an scretch um out fa beat um , Paul taak ta de offisa wa beena stanop dey . Paul aks um say , “ De law ain tell oona dat oona kin beat a Roman citizen wen nobody ain eben jedge um , needa find out dat e done sompin bad , ainty dough ? ”', "Gullah", "English"],
['Wen dey wen stretch him out fo whip him real hard , Paul wen tell da captain dat stay dea , “ Dis okay in da rules fo da Rome peopo ? fo you fo whip one guy dat get da same rights jalike da Rome peopo ? even one guy dat neva do notting wrong ? ”', "Hawaiian Pidgin", "English"],
['Wail di suoldya dehn wende tai op Paul fi biit im , Paul aks wan a di aafisa weh wende stan op gens im , “ Tel mi , sah , ih liigal fi biit wan Roman sitizn bifuor unu trai im dah kuot ? ”', "San Andrés–Providencia Creole", "English"],
['Afta dem tai im op an chrech im out fi biit im , Paal aks di ed fi onjrid suoja we did tan op de , “ Di Laa gi yu no rait fi biit mi , wan man we kom fram Ruom , wen yu no iivn kyari mi go a kuot an se mi gilti fi notn ? ”', "Jamaican Creole", "English"],
['Bɔt wɛn dɛn want bit am , dɛn tay am ; na in Pɔl aks di soja man dɛn edman we bin tinap de se , “ Di lɔ tɛl una se una kin bit pɔsin we na Roman wɛn una nɔ jɔj am yet ? ”', "Sierra Leonean Creole", "English"],
['Me letan zot fini atas li pou kapav fwet li , Pol dir ofisie ki ti la , “ Zot ena drwa fwet enn sitwayin Romin san mem ki zot ziz li ? ”', "Mauritian Creole", "English"],
['Ma ora nan a rèk su kurpa pa suté ku zuip , Pablo a bisa e ofisial di ehérsito pará einan : “ Boso tin mag di suta un hende ku ta siudadano romano sin ku e ta kondená ? ”', "Papiamento", "English"],
['Wen dem don put am for groun mak dem start to flog am , Pol kon ask di soja wey stand near am , " E dey rite mak una flog pesin wey bi Roman citizin , wen dem neva joj en kase ? "', "Nigerian Pidgin", "English"],
['Bat taem olketa taengem hand bilong hem long post for whipim hem , Paul sei olsem long bigman bilong army wea standap long there : “ Hao , hem stret for iufala whipim wanfala man bilong Rome wea iufala no kotem hem yet ? ”', "Pijin", "English"],
['en wen deibin taiyimap Pol blanga beldim im , imbin tok langa det boswan solja hubin jandap deya wansaid langa im . Imbin tok , “ Yumob nomo lau beldim mi , dumaji mi garram det rait seimwei laik ol yumob Roman pipul , en ai nomo bin abum kotkeis yet . ”', "Australian Kriol", "English"],
['Me tongana ala leke lo ti tene a pika lo na zaza , Paul atene na turugu ti kota kamba so ayeke luti na ndo so : “ Ndia amû lege na ALA ti pika na zaza mbeni koli so ayeke Romain na so a dë ngbanga na li ti lo pëpe ? ”', "Sango", "English"],
['Hën de tjëën go seeka tai fu de hupi . Hën Paulosu hakisi di kabiteni u sodati taanputaanpu dë taa : “ Unfa di wëti dë ? Un sa hupi wan goon mii u Loomë ufö un kuutu soni fëën ö ? ”', "Saramaccan", "English"],
['Ma di den poti Paulus didon langalanga fu wipi en , dan a taigi a legre-ofsiri di ben e tanapu drape : „ A fiti taki unu e wipi wan Romesma sondro fu krutu en ? ”', "Sranan Tongo", "English"],
['Bat wen dempla i bin mekpas Pol so dempla ken ploke em , Pol i bin spik po da sekan amiopisa uda bin stanap klostu wea em . Pol i bin spik , ‘ Ei yu ! Yu lau po ploke man uda gad rait wase man prom Rom , bipo yupla teke em po kot a ? ’', "Torres Strait Creole", "English"],
['Tasol taim ol i apim 2-pela han bilong en na pasim bilong wipim em , Pol i tokim ofisa bilong ami i sanap klostu olsem : “ I stret yupela i wipim wanpela man Rom taim em i no bin sanap yet long kot ? ”', "Tok Pisin", "English"],
['Men , lè yo fin mare Pòl pou yo bat li , li di ofisye ki te la a : “ Èske NOU gen dwa bat yon sitwayen women ki pa kondane ? ”', "Haitian Creole", "English"],
['Waktu dong ika sang Paulus ko mau firuk sang dia , ju dia bale tanya sang itu tantara , bilang , “ Iko pamarenta Roma pung atoran , mana yang batúl ? Kalo satu orang ada pung hak warga Roma , ais dia dapa parkara , bosong musti bekin karmana sang dia ? Bosong papoko lebe dolo sang dia , ko , bosong pareksa lebe dolo ? ”', "Kupang Malay", "English"],
['Maibé kuandu sira kesi tiha nia atu baku nia ho xikote , Paulo dehan ba kapitaun tropa nian neʼebé hamriik besik : “ Tuir lei , imi bele baku ema Roma ida maski seidauk tesi lia ba nia ka lae ? ”', "Tetun Dili", "English"],
['Bila dia-orang sudah ikatkan dia dngan tali kulit , Paulus kata sama itu hulubalang yang berdiri dkat situ , " Kalau satu anak Rom blum kna hukum , ada-kah patut angkau ssahkan dia ? "', "Malay Baba", "English"],
['Bu bau imene kukanga yandi sambu na kubula yandi fimbe , Paulu tubaka kwa nkuluntu ya telemaka kuna : Nsiku pesa nzila kubula muntu ya Loma fimbu ya imene kufunduswa ve ?', "Kituba", "English"],
['But when they had stretched him out for the whipping , Paul said to the army officer standing there : “ Is it lawful for you to scourge a Roman who has not been condemned ? ”', "English", "Hawaiian Pidgin"]
]
iface = gr.Interface(fn=generate, inputs=[text, src_language_drop_down, tgt_language_drop_down], outputs=text_ouptut, title='CreoleM2M System',
description='A system to translate to, from and between Creoles (and English). Currently the model supports ' + supported_lang, examples=examples) #
iface.launch(enable_queue=True)