ksvmuralidhar's picture
Create app.py
8fb0cad
import streamlit as st
import pandas as pd
import numpy as np
from unidecode import unidecode
import tensorflow as tf
import cloudpickle
from transformers import DistilBertTokenizerFast
import os
def load_model():
interpreter = tf.lite.Interpreter(model_path=os.path.join("models/lang_detect_hf_distilbert.tflite"))
with open("models/lang_detect_labelencoder.bin", "rb") as model_file_obj:
label_encoder = cloudpickle.load(model_file_obj)
model_checkpoint = "distilbert-base-multilingual-cased"
tokenizer = DistilBertTokenizerFast.from_pretrained(model_checkpoint)
return interpreter, label_encoder, tokenizer
interpreter, label_encoder, tokenizer = load_model()
def inference(text):
tflite_pred = "Can't Predict"
if text != "":
tokens = tokenizer(text, max_length=50, padding="max_length", truncation=True, return_tensors="tf")
# tflite model inference
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()[0]
attention_mask, input_ids = tokens['attention_mask'], tokens['input_ids']
interpreter.set_tensor(input_details[0]["index"], attention_mask)
interpreter.set_tensor(input_details[1]["index"], input_ids)
interpreter.invoke()
tflite_pred = interpreter.get_tensor(output_details["index"])[0]
tflite_pred_argmax = np.argmax(tflite_pred)
tflite_pred = f"{label_encoder.inverse_transform([tflite_pred_argmax])[0].upper()} ({str(np.round(tflite_pred[tflite_pred_argmax], 3))})"
return tflite_pred
def main():
st.title("Language Detection")
lang_trained = 'eng, rus, ita, tur, epo, ber, deu, kab, fra, por, spa, hun, jpn, heb, ukr, nld, fin, pol, mkd, lit, cmn, mar, ces, dan'.upper()
st.write(f'Model is trained on the following languages \n{lang_trained}')
review = st.text_area("Enter Text:", "", height=200)
if st.button("Submit"):
result = inference(review)
st.write(result)
if __name__ == "__main__":
main()