xpathfinder / app.py
stogaja's picture
Update app.py
6b2f9a9
# let's import the libraries
from sentence_transformers import util
from sentence_transformers import CrossEncoder
from sentence_transformers import SentenceTransformer
import sentence_transformers
import time
import sys
import os
import torch
import en_core_web_sm
from email import header
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import spacy
from sklearn.metrics.pairwise import cosine_similarity
from datasets import load_dataset
import io
import netrc
from tqdm import tqdm
tqdm.pandas()
# let's load the english stsb dataset
stsb_dataset = load_dataset('stsb_multi_mt', 'en')
stsb_train = pd.DataFrame(stsb_dataset['train'])
stsb_test = pd.DataFrame(stsb_dataset['test'])
# let's create helper functions
nlp = en_core_web_sm.load()
#nlp = spacy.load("en_core_web_sm")
def text_processing(sentence):
sentence = [token.lemma_.lower()
for token in nlp(sentence)
if token.is_alpha and not token.is_stop]
return sentence
def cos_sim(sentence1_emb, sentence2_emb):
cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
return np.diag(cos_sim)
# let's read the csv file
data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
prompt = "charles"
data['prompt'] = prompt
data.rename(columns={'target_text': 'sentence2',
'prompt': 'sentence1'}, inplace=True)
data['sentence2'] = data['sentence2'].astype('str')
data['sentence1'] = data['sentence1'].astype('str')
# loop through the data
XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
sentence_pairs = []
for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
sentence_pairs.append([sentence1, sentence2])
data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
sentence_pairs, show_progress_bar=True)
loaded_model = XpathFinder
# let's create containers
header_container = st.container()
mod_container = st.container()
# let's create the header
with header_container:
st.title("SBERT CrossEncoder")
st.markdown("This is a demo of the SBERT CrossEncoder model")
# let's create the model container
with mod_container:
# let's get input from the user
prompt = st.text_input("Enter a description below...")
if prompt:
simscore = loaded_model.predict([prompt])
# sort the values
data['SBERT CrossEncoder_Score'] = simscore
most_acc = data.head(5)
st.write(most_acc)
st.write("The most accurate sentence is: ",
most_acc['sentence2'].iloc[0])