import tensorflow as tf import numpy as np import pandas as pd import swifter import json import re import requests import time from keras.preprocessing.text import Tokenizer from sklearn.preprocessing import OneHotEncoder, LabelEncoder from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report # from keras.optimizers.optimizer_v2.rmsprop import RMSProp from tensorflow.keras.utils import to_categorical from tensorflow.keras.models import Sequential, load_model from tensorflow.keras.layers import Dense, Conv1D, Embedding, MaxPooling1D, GlobalMaxPooling1D, GlobalAveragePooling1D, SpatialDropout1D, LSTM, Dropout, SimpleRNN, Bidirectional, Attention, Activation, GRU, TextVectorization, Input from tensorflow.keras.callbacks import EarlyStopping from tensorflow.keras.preprocessing.sequence import pad_sequences import arabicstopwords.arabicstopwords as stp from nltk.stem.isri import ISRIStemmer from pyarabic.araby import strip_tashkeel, strip_tatweel from huggingface_hub import from_pretrained_keras from collections import Counter from fastapi import FastAPI, Request, HTTPException import firebase_admin from firebase_admin import credentials from firebase_admin import firestore import threading # Import the Firebase Admin SDK import firebase_admin from firebase_admin import credentials from firebase_admin import firestore from transformers import BertTokenizer, AutoModelForSeq2SeqLM, pipeline from arabert.preprocess import ArabertPreprocessor from transformers import AutoTokenizer, AutoModelForCausalLM import re # Firebase ininlaziton cred = credentials.Certificate( "text-to-emotions-firebase-adminsdk-8isbn-dffbdf01e8.json") firebase_admin.initialize_app(cred) # Model summury model_name="abdalrahmanshahrour/auto-arabic-summarization" preprocessor = ArabertPreprocessor(model_name="") tokenizer = AutoTokenizer.from_pretrained(model_name) modelsummary =from_pretrained_keras(model_name) pipeline1 = pipeline("text2text-generation",model=modelsummary,tokenizer=tokenizer) # Model inilization isristemmer = ISRIStemmer() model = from_pretrained_keras('MahmoudNasser/GRU-MODEL-EMOTION-AR-TEXT-76jP') def stemming(txt): return isristemmer.stem(txt) def remove_singleCharacter(text): text_tokenized = ar.tokenize(text) clean_txt = '' for word in text_tokenized: if len(word) != 1: clean_txt = clean_txt + word + ' ' return clean_txt[:-1] # remove_punctuations def remove_punctuations(text): punc = '''()-[]{};:'"\,<>./@#$%^&*،؛_~''' arabic_punctuations = '''`÷×؛_ـ،/:".,'~¦+|”…“–ـ=﴾﴿ ﹱ ﹹ ⸀˓• ב''' punctuations_list = punc + arabic_punctuations for x in punctuations_list: text = text.replace(x, ' ') return text def normalize_text(txt): txt = strip_tashkeel(txt) txt = strip_tatweel(txt) txt = ''.join(txt[i] for i in range(len(txt)) if i == 0 or txt[i-1] != txt[i]) # remove repeated characters return txt def remove_stopwords(txt, path="stopword.txt"): text_tokenized = txt.split(' ') clean_txt = '' # useful_words=[] # filtered_sentence=" " arabic_stop_words_file = open(path, 'r', encoding='utf-8') arabic_stop_words = arabic_stop_words_file.read().split('\n') for word in text_tokenized: if word not in arabic_stop_words: clean_txt = clean_txt + word + ' ' return clean_txt[:-1] def Remove_unwanted(text): # removing the extra spacing and links text = re.sub(r'^https?:\/\/.*[\r\n]*', ' ', text, flags=re.MULTILINE) text = re.sub(r'^http?:\/\/.*[\r\n]*', ' ', text, flags=re.MULTILINE) text = re.sub(r"http\S+", " ", text) text = re.sub(r"https\S+", " ", text) text = re.sub(r'\s+', ' ', text) text = re.sub(r'[a-zA-Z]+', ' ', text) text = re.sub(r"^\s+|\s+$", "", text) text = re.sub(r"(\s\d+)", " ", text) text = re.sub(r"$\d+\W+|\b\d+\b|\W+\d+$", " ", text) text = re.sub(r"\d+", " ", text) text = re.sub(r'[إأٱآا]', 'ا', text) text = re.sub(r'ى', '[ي]', text) text = re.sub(r'ء', '[ؤئ]', text) text = re.sub(r' +', ' ', text) return text def txt_preprocess(text): text = normalize_text(text) text = stemming(text) text = remove_stopwords(text) text = remove_punctuations(text) text = Remove_unwanted(text) return text def see_if_thereupdates(): f = open("updates.txt", "r") return f.readline() def getmodel(): m = from_pretrained_keras('MahmoudNasser/GRU-MODEL-EMOTION-AR-TEXT-72P') return m def original_values(num): if num == 0: return 'anger' elif num == 1: return 'sadness' elif num == 2: return 'joy' elif num == 3: return 'surprise' elif num == 4: return 'love' elif num == 5: return 'sympathy' elif num == 6: return 'fear' def modelsummary(data): result = pipeline1(text, pad_token_id= tokenizer.eos_token_id, num_beams=4, repetition_penalty=3.0, max_length=600, length_penalty=1.0, no_repeat_ngram_size = 3)[0]['generated_text'] result = remove_punctuations(result) return { 'summary':result} def modelpredict(data): data = txt_preprocess(data) pred = model.predict(pd.Series([data])) return {'anger': float(pred[0][0]), 'sadness': float(pred[0][1]), 'joy': float(pred[0][2]), 'surprise': float(pred[0][3]), 'love': float(pred[0][4]), 'sympathy': float(pred[0][5]), 'fear': float(pred[0][6])} # return {"anger": .90, "happy": .02, "emotionlabel": "anger"} # Main Server inilization app = FastAPI() @app.get("/") def index(): return "Hello World" @app.post("/summary") async def read_root(request:Request): json_data = await request.json() if 'text'in json_data: return modelsummary(json_data['text']) else: raise HTTPException(status_code=400, detail="Missing text value") @app.post("/predict") async def read_root(request: Request): json_data = await request.json() if "mathod" in json_data and json_data["mathod"] == "emotion_predict" and 'text' in json_data: return modelpredict(json_data["text"]) else: raise HTTPException(status_code=400, detail="Missing mathod value") @app.get("/commonwords") def getcommonwords(): return {'التسجيل': 23, 'مش': 19, 'تطبيق': 18, 'التطبيق': 18, 'التفعيل': 17, 'كود': 13, 'ارسال': 12, 'تسجيل': 12, 'يتم': 12, 'الرقم': 12}