Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer | |
import tensorflow as tf | |
import numpy as np | |
def convert_label_to_title(label): | |
convert_dict = { | |
0: "SỨC KHỎE", | |
1: "GIÁO DỤC", | |
2: "THỂ THAO", | |
3: "PHÁP LUẬT", | |
4: "KHOA HỌC", | |
5: "DU LỊCH", | |
6: "GIẢI TRÍ", | |
7: "KINH DOANH" | |
} | |
return convert_dict[label] | |
def predict_sentence(model, tokenizer, sentence): | |
input_data = tokenizer(sentence, return_tensors='tf', padding=True, truncation=True) | |
logits = model(input_data['input_ids'], attention_mask=input_data['attention_mask']).logits | |
probabilities = tf.nn.softmax(logits, axis=1) | |
predicted_class = tf.argmax(logits, axis=1).numpy()[0] | |
highest_probability = probabilities.numpy()[0, predicted_class] | |
title = convert_label_to_title(predicted_class) | |
return title, probabilities.numpy(), highest_probability | |
def load_model(checkpoint, num_class): | |
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=num_class) | |
tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
return model, tokenizer | |
checkpoint = 'distilbert-base-multilingual-cased' | |
model, tokenizer = load_model(checkpoint, 8) | |
model.load_weights('best_model_weights.h5') | |
text = st.text_area('Nhập tiêu đề vào đây') | |
if text: | |
title, probabilities, highest = predict_sentence(model, tokenizer, text) | |
out = { | |
'title': title, | |
'prob': highest | |
} | |
st.json(out) | |