Spaces:
Build error
Build error
# let's import the libraries | |
from sentence_transformers import util | |
from sentence_transformers import CrossEncoder | |
from sentence_transformers import SentenceTransformer | |
import sentence_transformers | |
import time | |
import sys | |
import os | |
import torch | |
import en_core_web_sm | |
from email import header | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import pickle | |
import spacy | |
from sklearn.metrics.pairwise import cosine_similarity | |
from datasets import load_dataset | |
import io | |
import netrc | |
from tqdm import tqdm | |
tqdm.pandas() | |
# let's load the english stsb dataset | |
stsb_dataset = load_dataset('stsb_multi_mt', 'en') | |
stsb_train = pd.DataFrame(stsb_dataset['train']) | |
stsb_test = pd.DataFrame(stsb_dataset['test']) | |
# let's create helper functions | |
nlp = en_core_web_sm.load() | |
#nlp = spacy.load("en_core_web_sm") | |
def text_processing(sentence): | |
sentence = [token.lemma_.lower() | |
for token in nlp(sentence) | |
if token.is_alpha and not token.is_stop] | |
return sentence | |
def cos_sim(sentence1_emb, sentence2_emb): | |
cos_sim = cosine_similarity(sentence1_emb, sentence2_emb) | |
return np.diag(cos_sim) | |
# let's read the csv file | |
data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1) | |
prompt = "charles" | |
data['prompt'] = prompt | |
data.rename(columns={'target_text': 'sentence2', | |
'prompt': 'sentence1'}, inplace=True) | |
data['sentence2'] = data['sentence2'].astype('str') | |
data['sentence1'] = data['sentence1'].astype('str') | |
# loop through the data | |
XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base") | |
sentence_pairs = [] | |
for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']): | |
sentence_pairs.append([sentence1, sentence2]) | |
data['SBERT CrossEncoder_Score'] = XpathFinder.predict( | |
sentence_pairs, show_progress_bar=True) | |
loaded_model = XpathFinder | |
# let's create containers | |
header_container = st.container() | |
mod_container = st.container() | |
# let's create the header | |
with header_container: | |
st.title("SBERT CrossEncoder") | |
st.markdown("This is a demo of the SBERT CrossEncoder model") | |
# let's create the model container | |
with mod_container: | |
# let's get input from the user | |
prompt = st.text_input("Enter a description below...") | |
if prompt: | |
simscore = loaded_model.predict([prompt]) | |
# sort the values | |
data['SBERT CrossEncoder_Score'] = simscore | |
most_acc = data.head(5) | |
st.write(most_acc) | |
st.write("The most accurate sentence is: ", | |
most_acc['sentence2'].iloc[0]) | |