Spaces:
Runtime error
Runtime error
import tensorflow as tf | |
import numpy as np | |
import pandas as pd | |
import swifter | |
import json | |
import re | |
import requests | |
import time | |
from keras.preprocessing.text import Tokenizer | |
from sklearn.preprocessing import OneHotEncoder, LabelEncoder | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import classification_report | |
# from keras.optimizers.optimizer_v2.rmsprop import RMSProp | |
from tensorflow.keras.utils import to_categorical | |
from tensorflow.keras.models import Sequential, load_model | |
from tensorflow.keras.layers import Dense, Conv1D, Embedding, MaxPooling1D, GlobalMaxPooling1D, GlobalAveragePooling1D, SpatialDropout1D, LSTM, Dropout, SimpleRNN, Bidirectional, Attention, Activation, GRU, TextVectorization, Input | |
from tensorflow.keras.callbacks import EarlyStopping | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
import arabicstopwords.arabicstopwords as stp | |
from nltk.stem.isri import ISRIStemmer | |
from pyarabic.araby import strip_tashkeel, strip_tatweel | |
from huggingface_hub import from_pretrained_keras | |
from collections import Counter | |
from fastapi import FastAPI, Request, HTTPException | |
import firebase_admin | |
from firebase_admin import credentials | |
from firebase_admin import firestore | |
import threading | |
# Import the Firebase Admin SDK | |
import firebase_admin | |
from firebase_admin import credentials | |
from firebase_admin import firestore | |
from transformers import BertTokenizer, AutoModelForSeq2SeqLM, pipeline | |
from arabert.preprocess import ArabertPreprocessor | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import re | |
# Firebase ininlaziton | |
cred = credentials.Certificate( | |
"text-to-emotions-firebase-adminsdk-8isbn-dffbdf01e8.json") | |
firebase_admin.initialize_app(cred) | |
# Model summury | |
model_name="abdalrahmanshahrour/auto-arabic-summarization" | |
preprocessor = ArabertPreprocessor(model_name="") | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
modelsummary =AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
pipeline1 = pipeline("text2text-generation",model=modelsummary,tokenizer=tokenizer) | |
# Model inilization | |
isristemmer = ISRIStemmer() | |
model = from_pretrained_keras('MahmoudNasser/GRU-MODEL-EMOTION-AR-TEXT-76jP') | |
def stemming(txt): | |
return isristemmer.stem(txt) | |
def remove_singleCharacter(text): | |
text_tokenized = ar.tokenize(text) | |
clean_txt = '' | |
for word in text_tokenized: | |
if len(word) != 1: | |
clean_txt = clean_txt + word + ' ' | |
return clean_txt[:-1] | |
# remove_punctuations | |
def remove_punctuations(text): | |
punc = '''()-[]{};:'"\,<>./@#$%^&*،؛_~''' | |
arabic_punctuations = '''`÷×؛_ـ،/:".,'~¦+|”…“–ـ=﴾﴿ ﹱ ﹹ ⸀˓• ב''' | |
punctuations_list = punc + arabic_punctuations | |
for x in punctuations_list: | |
text = text.replace(x, ' ') | |
return text | |
def normalize_text(txt): | |
txt = strip_tashkeel(txt) | |
txt = strip_tatweel(txt) | |
txt = ''.join(txt[i] for i in range(len(txt)) if i == | |
0 or txt[i-1] != txt[i]) # remove repeated characters | |
return txt | |
def remove_stopwords(txt, path="stopword.txt"): | |
text_tokenized = txt.split(' ') | |
clean_txt = '' | |
# useful_words=[] | |
# filtered_sentence=" " | |
arabic_stop_words_file = open(path, 'r', encoding='utf-8') | |
arabic_stop_words = arabic_stop_words_file.read().split('\n') | |
for word in text_tokenized: | |
if word not in arabic_stop_words: | |
clean_txt = clean_txt + word + ' ' | |
return clean_txt[:-1] | |
def Remove_unwanted(text): | |
# removing the extra spacing and links | |
text = re.sub(r'^https?:\/\/.*[\r\n]*', ' ', text, flags=re.MULTILINE) | |
text = re.sub(r'^http?:\/\/.*[\r\n]*', ' ', text, flags=re.MULTILINE) | |
text = re.sub(r"http\S+", " ", text) | |
text = re.sub(r"https\S+", " ", text) | |
text = re.sub(r'\s+', ' ', text) | |
text = re.sub(r'[a-zA-Z]+', ' ', text) | |
text = re.sub(r"^\s+|\s+$", "", text) | |
text = re.sub(r"(\s\d+)", " ", text) | |
text = re.sub(r"$\d+\W+|\b\d+\b|\W+\d+$", " ", text) | |
text = re.sub(r"\d+", " ", text) | |
text = re.sub(r'[إأٱآا]', 'ا', text) | |
text = re.sub(r'ى', '[ي]', text) | |
text = re.sub(r'ء', '[ؤئ]', text) | |
text = re.sub(r' +', ' ', text) | |
return text | |
def txt_preprocess(text): | |
text = normalize_text(text) | |
text = stemming(text) | |
text = remove_stopwords(text) | |
text = remove_punctuations(text) | |
text = Remove_unwanted(text) | |
return text | |
def see_if_thereupdates(): | |
f = open("updates.txt", "r") | |
return f.readline() | |
def getmodel(): | |
m = from_pretrained_keras('MahmoudNasser/GRU-MODEL-EMOTION-AR-TEXT-72P') | |
return m | |
def original_values(num): | |
if num == 0: | |
return 'anger' | |
elif num == 1: | |
return 'sadness' | |
elif num == 2: | |
return 'joy' | |
elif num == 3: | |
return 'surprise' | |
elif num == 4: | |
return 'love' | |
elif num == 5: | |
return 'sympathy' | |
elif num == 6: | |
return 'fear' | |
def modelsummary(data): | |
result = pipeline1(text, | |
pad_token_id= tokenizer.eos_token_id, | |
num_beams=4, | |
repetition_penalty=3.0, | |
max_length=600, | |
length_penalty=1.0, | |
no_repeat_ngram_size = 3)[0]['generated_text'] | |
result = remove_punctuations(result) | |
return { 'summary':result} | |
def modelpredict(data): | |
data = txt_preprocess(data) | |
pred = model.predict(pd.Series([data])) | |
return {'anger': float(pred[0][0]), 'sadness': float(pred[0][1]), 'joy': float(pred[0][2]), 'surprise': float(pred[0][3]), | |
'love': float(pred[0][4]), 'sympathy': float(pred[0][5]), 'fear': float(pred[0][6])} | |
# return {"anger": .90, "happy": .02, "emotionlabel": "anger"} | |
# Main Server inilization | |
app = FastAPI() | |
def index(): | |
return "Hello World" | |
async def read_root(request:Request): | |
json_data = await request.json() | |
if 'text'in json_data: | |
return modelsummary(json_data['text']) | |
else: | |
raise HTTPException(status_code=400, detail="Missing text value") | |
async def read_root(request: Request): | |
json_data = await request.json() | |
if "mathod" in json_data and json_data["mathod"] == "emotion_predict" and 'text' in json_data: | |
return modelpredict(json_data["text"]) | |
else: | |
raise HTTPException(status_code=400, detail="Missing mathod value") | |
def getcommonwords(): | |
return {'التسجيل': 23, 'مش': 19, 'تطبيق': 18, 'التطبيق': 18, 'التفعيل': 17, 'كود': 13, 'ارسال': 12, 'تسجيل': 12, 'يتم': 12, 'الرقم': 12} | |