Spaces:
Build error
Build error
File size: 3,289 Bytes
985d3f5 9d22f91 985d3f5 2a2a619 985d3f5 2a2a619 985d3f5 2a2a619 985d3f5 2a2a619 985d3f5 2a2a619 985d3f5 2a2a619 985d3f5 2a2a619 9d22f91 985d3f5 2a2a619 b6b6380 2a2a619 985d3f5 2a2a619 b6b6380 2a2a619 985d3f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# let's import the libraries we need
#from sentence_transformers import SentenceTransformer
#from sentence_transformers import CrossEncoder
import spacy
from sklearn.metrics.pairwise import cosine_similarity
from datasets import load_dataset
import io
import netrc
import pickle
import sys
import pandas as pd
import numpy as np
import streamlit as st
import torch
from tqdm import tqdm
tqdm.pandas()
# Load the English STSB dataset
stsb_dataset = load_dataset('stsb_multi_mt', 'en')
stsb_train = pd.DataFrame(stsb_dataset['train'])
stsb_test = pd.DataFrame(stsb_dataset['test'])
# let's create helper functions
nlp = spacy.load("en_core_web_sm")
def text_processing(sentence):
sentence = [token.lemma_.lower()
for token in nlp(sentence)
if token.is_alpha and not token.is_stop]
return sentence
def cos_sim(sentence1_emb, sentence2_emb):
cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
return np.diag(cos_sim)
# let's read the csv file
data = (pd.read_csv("/SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
prompt = "charles"
data['prompt'] = prompt
data.rename(columns={'target_text': 'sentence2',
'prompt': 'sentence1'}, inplace=True)
data['sentence2'] = data['sentence2'].astype('str')
data['sentence1'] = data['sentence1'].astype('str')
XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
sentence_pairs = []
for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
sentence_pairs.append([sentence1, sentence2])
data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
sentence_pairs, show_progress_bar=True)
# sorting the values
data.sort_values(by=['SBERT CrossEncoder_Score'], ascending=False)
loaded_model = XpathFinder
# Containers
header_container = st.container()
mod_container = st.container()
# Header
with header_container:
# different levels of text you can include in your app
st.title("Xpath Finder App")
# model container
with mod_container:
# collecting input from user
prompt = st.text_input("Enter your description below ...")
# Loading e data
data = (pd.read_csv("/content/SBERT_data.csv")
).drop(['Unnamed: 0'], axis=1)
data['prompt'] = prompt
data.rename(columns={'target_text': 'sentence2',
'prompt': 'sentence1'}, inplace=True)
data['sentence2'] = data['sentence2'].astype('str')
data['sentence1'] = data['sentence1'].astype('str')
# let's pass the input to the loaded_model with torch compiled with cuda
if prompt:
# let's get the result
simscore = loaded_model.predict([prompt])
from sentence_transformers import CrossEncoder
loaded_model = CrossEncoder("cross-encoder/stsb-roberta-base")
sentence_pairs = []
for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
sentence_pairs.append([sentence1, sentence2])
# sorting the df to get highest scoring xpath_container
data['SBERT CrossEncoder_Score'] = loaded_model.predict(sentence_pairs)
most_acc = data.head(5)
# predictions
st.write("Highest Similarity score: ", simscore)
st.text("Is this one of these the Xpath you're looking for?")
st.write(st.write(most_acc["input_text"]))
|