Spaces:
Build error
Build error
File size: 2,534 Bytes
050cd2b 6b2f9a9 050cd2b 985d3f5 2a2a619 985d3f5 050cd2b 985d3f5 be2864a 6b2f9a9 985d3f5 be2864a 985d3f5 6b2f9a9 be2864a 985d3f5 6b2f9a9 985d3f5 76d621c 985d3f5 050cd2b 985d3f5 6b2f9a9 985d3f5 6b2f9a9 985d3f5 2a2a619 050cd2b 2a2a619 050cd2b 2a2a619 6b2f9a9 2a2a619 050cd2b 2a2a619 6b2f9a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# let's import the libraries
from sentence_transformers import util
from sentence_transformers import CrossEncoder
from sentence_transformers import SentenceTransformer
import sentence_transformers
import time
import sys
import os
import torch
import en_core_web_sm
from email import header
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import spacy
from sklearn.metrics.pairwise import cosine_similarity
from datasets import load_dataset
import io
import netrc
from tqdm import tqdm
tqdm.pandas()
# let's load the english stsb dataset
stsb_dataset = load_dataset('stsb_multi_mt', 'en')
stsb_train = pd.DataFrame(stsb_dataset['train'])
stsb_test = pd.DataFrame(stsb_dataset['test'])
# let's create helper functions
nlp = en_core_web_sm.load()
#nlp = spacy.load("en_core_web_sm")
def text_processing(sentence):
sentence = [token.lemma_.lower()
for token in nlp(sentence)
if token.is_alpha and not token.is_stop]
return sentence
def cos_sim(sentence1_emb, sentence2_emb):
cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
return np.diag(cos_sim)
# let's read the csv file
data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
prompt = "charles"
data['prompt'] = prompt
data.rename(columns={'target_text': 'sentence2',
'prompt': 'sentence1'}, inplace=True)
data['sentence2'] = data['sentence2'].astype('str')
data['sentence1'] = data['sentence1'].astype('str')
# loop through the data
XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
sentence_pairs = []
for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
sentence_pairs.append([sentence1, sentence2])
data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
sentence_pairs, show_progress_bar=True)
loaded_model = XpathFinder
# let's create containers
header_container = st.container()
mod_container = st.container()
# let's create the header
with header_container:
st.title("SBERT CrossEncoder")
st.markdown("This is a demo of the SBERT CrossEncoder model")
# let's create the model container
with mod_container:
# let's get input from the user
prompt = st.text_input("Enter a description below...")
if prompt:
simscore = loaded_model.predict([prompt])
# sort the values
data['SBERT CrossEncoder_Score'] = simscore
most_acc = data.head(5)
st.write(most_acc)
st.write("The most accurate sentence is: ",
most_acc['sentence2'].iloc[0])
|