import gradio as gr from transformers import AutoModelForSeq2SeqLM, AutoTokenizer model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K" src_lang="ru" tgt_lang="zu" # tokenizer = AutoTokenizer.from_pretrained(model_path, src_lang=src_lang) tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForSeq2SeqLM.from_pretrained(model_path) def translate(text, num_beams=4, num_return_sequences=4): inputs = tokenizer(text, return_tensors="pt") num_return_sequences = min(num_return_sequences, num_beams) translated_tokens = model.generate( **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences ) translations = [] for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True): translations.append(translation) # result = {"input":text, "translations":translations} return text, translations output = gr.Textbox() # with gr.Accordion("Advanced Options"): num_beams = gr.inputs.Slider(2, 10, step=1, label="Number of beams", default=4) num_return_sequences = gr.inputs.Slider(2, 10, step=1, label="Number of returned sentences", default=4) title = "Russian-Circassian translator demo" article = "
Want to help? Join the Discord server
" examples = [ ["Мы идем домой"], ["Сегодня хорошая погода"], ["Дети играют во дворе"], ["We live in a big house"], ["Tu es une bonne personne."], ["أين تعيش؟"], ["Bir şeyler yapmak istiyorum."], ["– Если я его отпущу, то ты вовек не сможешь его поймать, – заявил Сосруко."], ["Как только старик ушел, Сатаней пошла к Саусырыко."], ["我永远不会放弃你。"], ["우리는 소치에 살고 있습니다."], ] gr.Interface( fn=translate, inputs=["text", num_beams, num_return_sequences], outputs=["text", output], title=title, # examples=examples, article=article).launch() # import gradio as gr # title = "Русско-черкесский переводчик" # description = "Demo of a Russian-Circassian (Kabardian dialect) translator.