File size: 6,301 Bytes
51d3578
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer

# Define a dictionary to map language names to model identifiers
models = {
    'Afrikaans': 'Helsinki-NLP/opus-mt-en-af',
    'Amharic': 'Helsinki-NLP/opus-mt-en-am',
    'Arabic': 'Helsinki-NLP/opus-mt-en-ar',
    'Asturian': 'Helsinki-NLP/opus-mt-en-ast',
    'Azerbaijani': 'Helsinki-NLP/opus-mt-en-az',
    'Bashkir': 'Helsinki-NLP/opus-mt-en-ba',
    'Belarusian': 'Helsinki-NLP/opus-mt-en-be',
    'Bulgarian': 'Helsinki-NLP/opus-mt-en-bg',
    'Bengali': 'Helsinki-NLP/opus-mt-en-bn',
    'Breton': 'Helsinki-NLP/opus-mt-en-br',
    'Bosnian': 'Helsinki-NLP/opus-mt-en-bs',
    'Catalan': 'Helsinki-NLP/opus-mt-en-ca',
    'Cebuano': 'Helsinki-NLP/opus-mt-en-ceb',
    'Czech': 'Helsinki-NLP/opus-mt-en-cs',
    'Welsh': 'Helsinki-NLP/opus-mt-en-cy',
    'Danish': 'Helsinki-NLP/opus-mt-en-da',
    'German': 'Helsinki-NLP/opus-mt-en-de',
    'Greek': 'Helsinki-NLP/opus-mt-en-el',
    'English': 'Helsinki-NLP/opus-mt-en-en',
    'Spanish': 'Helsinki-NLP/opus-mt-en-es',
    'Estonian': 'Helsinki-NLP/opus-mt-en-et',
    'Persian': 'Helsinki-NLP/opus-mt-en-fa',
    'Fulah': 'Helsinki-NLP/opus-mt-en-ff',
    'Finnish': 'Helsinki-NLP/opus-mt-en-fi',
    'French': 'Helsinki-NLP/opus-mt-en-fr',
    'Western Frisian': 'Helsinki-NLP/opus-mt-en-fy',
    'Irish': 'Helsinki-NLP/opus-mt-en-ga',
    'Scottish Gaelic': 'Helsinki-NLP/opus-mt-en-gd',
    'Galician': 'Helsinki-NLP/opus-mt-en-gl',
    'Gujarati': 'Helsinki-NLP/opus-mt-en-gu',
    'Hausa': 'Helsinki-NLP/opus-mt-en-ha',
    'Hebrew': 'Helsinki-NLP/opus-mt-en-he',
    'Hindi': 'Helsinki-NLP/opus-mt-en-hi',
    'Croatian': 'Helsinki-NLP/opus-mt-en-hr',
    'Haitian Creole': 'Helsinki-NLP/opus-mt-en-ht',
    'Hungarian': 'Helsinki-NLP/opus-mt-en-hu',
    'Armenian': 'Helsinki-NLP/opus-mt-en-hy',
    'Indonesian': 'Helsinki-NLP/opus-mt-en-id',
    'Igbo': 'Helsinki-NLP/opus-mt-en-ig',
    'Iloko': 'Helsinki-NLP/opus-mt-en-ilo',
    'Icelandic': 'Helsinki-NLP/opus-mt-en-is',
    'Italian': 'Helsinki-NLP/opus-mt-en-it',
    'Japanese': 'Helsinki-NLP/opus-mt-en-ja',
    'Javanese': 'Helsinki-NLP/opus-mt-en-jv',
    'Georgian': 'Helsinki-NLP/opus-mt-en-ka',
    'Kazakh': 'Helsinki-NLP/opus-mt-en-kk',
    'Central Khmer': 'Helsinki-NLP/opus-mt-en-km',
    'Kannada': 'Helsinki-NLP/opus-mt-en-kn',
    'Korean': 'Helsinki-NLP/opus-mt-en-ko',
    'Luxembourgish': 'Helsinki-NLP/opus-mt-en-lb',
    'Ganda': 'Helsinki-NLP/opus-mt-en-lg',
    'Lingala': 'Helsinki-NLP/opus-mt-en-ln',
    'Lao': 'Helsinki-NLP/opus-mt-en-lo',
    'Lithuanian': 'Helsinki-NLP/opus-mt-en-lt',
    'Latvian': 'Helsinki-NLP/opus-mt-en-lv',
    'Malagasy': 'Helsinki-NLP/opus-mt-en-mg',
    'Macedonian': 'Helsinki-NLP/opus-mt-en-mk',
    'Malayalam': 'Helsinki-NLP/opus-mt-en-ml',
    'Mongolian': 'Helsinki-NLP/opus-mt-en-mn',
    'Marathi': 'Helsinki-NLP/opus-mt-en-mr',
    'Malay': 'Helsinki-NLP/opus-mt-en-ms',
    'Burmese': 'Helsinki-NLP/opus-mt-en-my',
    'Nepali': 'Helsinki-NLP/opus-mt-en-ne',
    'Dutch': 'Helsinki-NLP/opus-mt-en-nl',
    'Norwegian': 'Helsinki-NLP/opus-mt-en-no',
    'Northern Sotho': 'Helsinki-NLP/opus-mt-en-ns',
    'Occitan': 'Helsinki-NLP/opus-mt-en-oc',
    'Oriya': 'Helsinki-NLP/opus-mt-en-or',
    'Panjabi': 'Helsinki-NLP/opus-mt-en-pa',
    'Polish': 'Helsinki-NLP/opus-mt-en-pl',
    'Pushto': 'Helsinki-NLP/opus-mt-en-ps',
    'Portuguese': 'Helsinki-NLP/opus-mt-en-pt',
    'Romanian': 'Helsinki-NLP/opus-mt-en-ro',
    'Russian': 'Helsinki-NLP/opus-mt-en-ru',
    'Sindhi': 'Helsinki-NLP/opus-mt-en-sd',
    'Sinhala': 'Helsinki-NLP/opus-mt-en-si',
    'Slovak': 'Helsinki-NLP/opus-mt-en-sk',
    'Slovenian': 'Helsinki-NLP/opus-mt-en-sl',
    'Somali': 'Helsinki-NLP/opus-mt-en-so',
    'Albanian': 'Helsinki-NLP/opus-mt-en-sq',
    'Serbian': 'Helsinki-NLP/opus-mt-en-sr',
    'Swati': 'Helsinki-NLP/opus-mt-en-ss',
    'Sundanese': 'Helsinki-NLP/opus-mt-en-su',
    'Swedish': 'Helsinki-NLP/opus-mt-en-sv',
    'Swahili': 'Helsinki-NLP/opus-mt-en-sw',
    'Tamil': 'Helsinki-NLP/opus-mt-en-ta',
    'Thai': 'Helsinki-NLP/opus-mt-en-th',
    'Tagalog': 'Helsinki-NLP/opus-mt-en-tl',
    'Tswana': 'Helsinki-NLP/opus-mt-en-tn',
    'Turkish': 'Helsinki-NLP/opus-mt-en-tr',
    'Ukrainian': 'Helsinki-NLP/opus-mt-en-uk',
    'Urdu': 'Helsinki-NLP/opus-mt-en-ur',
    'Uzbek': 'Helsinki-NLP/opus-mt-en-uz',
    'Vietnamese': 'Helsinki-NLP/opus-mt-en-vi',
    'Wolof': 'Helsinki-NLP/opus-mt-en-wo',
    'Xhosa': 'Helsinki-NLP/opus-mt-en-xh',
    'Yiddish': 'Helsinki-NLP/opus-mt-en-yi',
    'Yoruba': 'Helsinki-NLP/opus-mt-en-yo',
    'Chinese': 'Helsinki-NLP/opus-mt-en-zh',
    'Zulu': 'Helsinki-NLP/opus-mt-en-zu'
}

def load_model(language):
    """Load the model and tokenizer for the specified target language."""
    model_name = models.get(language)
    if model_name:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        return model, tokenizer
    else:
        st.error(f"Model for {language} not found.")
        return None, None

def translate_text(text, model, tokenizer):
    """Translate text using the provided model and tokenizer."""
    inputs = tokenizer.encode(text, return_tensors="pt")
    translated = model.generate(inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

def main():
    st.title("🌐 Multilingual Translator")
    st.markdown("Created by: [**Engr. Hamesh Raj**](https://www.linkedin.com/in/datascientisthameshraj/)")

    # Target language selection
    target_language = st.selectbox("Select target language:", list(models.keys()))

    # Input text area
    text_to_translate = st.text_area("Enter text in English:")

    if st.button("Translate"):
        if text_to_translate:
            # Load the model based on target language
            model, tokenizer = load_model(target_language)
            
            if model and tokenizer:
                translated_text = translate_text(text_to_translate, model, tokenizer)
                st.write(f"**Translation in {target_language}:**")
                st.write(translated_text)
        else:
            st.warning("Please enter text to translate.")

if __name__ == "__main__":
    main()