# ArabicT5 Model for Arabic News Classification and Generation
- In this model focus on classifying and generating news Arabic.
# The number in the generated text represents the category of the news, as shown below:
category_mapping = {
'Political':1,
'Economy':2,
'Health':3,
'Sport':4,
'Culture':5,
'Technology':6,
'Art':7,
'Accidents':8
}
# Training parameters
Training batch size | 8 |
Evaluation batch size | 8 |
Learning rate | 1e-4 |
Max length input | 64 |
Max length target | 200 |
Number workers | 4 |
Epoch | 5 |
# Results
Training Loss | 3.20 |
Classification Accuracy | 95.7% |
Generation Accuracy | 88.87% |
# Example usage
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline
model_name = "Hezam/ArabicT5-49GB-small-classification-generation"
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)
generation_pipeline = pipeline("text2text-generation",model=model,tokenizer=tokenizer)
text = "أوقفوا القتل الجماعي في غزة"
output= generation_pipeline(text,
num_beams=10,
max_length=200,
top_p=0.9,
repetition_penalty = 3.0,
no_repeat_ngram_size = 3)[0]["generated_text"]
output
category: 1 article: كتب عبد اللطيف صبح قال الرءيس الفلسطيني محمود عباس في تصريح ل اليوم السابع وقفوا القتل الجماعي في مدينه غزة مءكدا يجب يوقفوا قتل المدنيين العزل في قطاعي غزة والضفه وغزه واوقفوا القتل الجماع
bash
category: 1 article: كتب عبد اللطيف صبح قال الرءيس الفلسطيني محمود عباس في تصريح ل اليوم السابع وقفوا القتل الجماعي في مدينه غزة مءكدا يجب يوقفوا قتل المدنيين العزل في قطاعي غزة والضفه وغزه واوقفوا القتل الجماع
- Downloads last month
- 2
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.