import gradio as gr from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch # Load the saved models and tokenizers model_roberta = AutoModelForSequenceClassification.from_pretrained("./models/roberta-base-offensive") tokenizer_roberta = AutoTokenizer.from_pretrained("./models/roberta-base-offensive") model_distilbert = AutoModelForSequenceClassification.from_pretrained("./models/distilbert-base-uncased-offensive") tokenizer_distilbert = AutoTokenizer.from_pretrained("./models/distilbert-base-uncased-offensive") model_deberta = AutoModelForSequenceClassification.from_pretrained("./models/deberta-offensive") tokenizer_deberta = AutoTokenizer.from_pretrained("./models/deberta-offensive") model_bert = AutoModelForSequenceClassification.from_pretrained("./models/bert-offensive") tokenizer_bert = AutoTokenizer.from_pretrained("./models/bert-offensive") # Arabic saved Models and tokenizers model_arbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/ARBERT") tokenizer_arbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/ARBERT") model_marbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/MARBERT") tokenizer_marbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/MARBERT") def predict(tweet, model_choice): if model_choice == "RoBERTa": model = model_roberta tokenizer = tokenizer_roberta elif model_choice == "DistilBERT": model = model_distilbert tokenizer = tokenizer_distilbert elif model_choice == "ARBERT": model = model_arbert tokenizer = tokenizer_arbert elif model_choice == "MARBERT": model = model_marbert tokenizer = tokenizer_marbert elif model_choice == "DeBERTa": model = model_deberta tokenizer = tokenizer_deberta elif model_choice == "BERT": model = model_bert tokenizer = tokenizer_bert else: return "Model not selected", "Please select a model." encoded_input = tokenizer.encode(tweet, return_tensors='pt', truncation=True, max_length=512, padding=True) with torch.no_grad(): output = model(encoded_input) logits = output.logits probabilities = torch.softmax(logits, dim=-1) prediction_index = probabilities.argmax().item() prediction_map = {0: "Not Offensive", 1: "Offensive"} prediction = prediction_map[prediction_index] confidence = probabilities[0, prediction_index].item() return prediction, f"Confidence: {confidence:.4f}" def app_interface(): with gr.Blocks() as app: gr.Markdown("## Offensive Language Detection") gr.Markdown("### Instructions:") gr.Markdown("1. Select the language of the text.\n2. Choose a model corresponding to the selected language:\n - For **English**: BERT, DeBERTa, RoBERTa, or DistilBERT\n - For **Tunisian Arabic**: ARBERT or MARBERT") with gr.Row(): language = gr.Radio(["English", "Tunisian Arabic"], label="Choose Language") with gr.Row(): model_choice = gr.Dropdown(["RoBERTa", "DistilBERT", "ARBERT", "MARBERT", "DeBERTa", "BERT"], label="Choose Model") with gr.Row(): tweet = gr.Textbox(lines=4, placeholder="Enter your text here...", label="Text") submit_btn = gr.Button("Predict") with gr.Row(): prediction = gr.Textbox(label="Prediction") confidence = gr.Textbox(label="Confidence") submit_btn.click(fn=predict, inputs=[tweet, model_choice], outputs=[prediction, confidence]) return app app = app_interface() app.launch()