stogaja commited on
Commit
050cd2b
1 Parent(s): be2864a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -57
app.py CHANGED
@@ -1,21 +1,26 @@
1
- # let's import the libraries we need
2
- #from sentence_transformers import SentenceTransformer
3
- #from sentence_transformers import CrossEncoder
 
 
 
4
  import spacy
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  from datasets import load_dataset
7
  import io
8
  import netrc
9
- import pickle
10
- import sys
11
- import pandas as pd
12
- import numpy as np
13
- import streamlit as st
14
- import torch
15
  from tqdm import tqdm
16
  tqdm.pandas()
 
 
 
 
 
 
 
 
17
 
18
- # Load the English STSB dataset
19
  stsb_dataset = load_dataset('stsb_multi_mt', 'en')
20
  stsb_train = pd.DataFrame(stsb_dataset['train'])
21
  stsb_test = pd.DataFrame(stsb_dataset['test'])
@@ -23,19 +28,16 @@ stsb_test = pd.DataFrame(stsb_dataset['test'])
23
  # let's create helper functions
24
  nlp = spacy.load("en_core_web_sm")
25
 
26
-
27
  def text_processing(sentence):
28
  sentence = [token.lemma_.lower()
29
  for token in nlp(sentence)
30
  if token.is_alpha and not token.is_stop]
31
  return sentence
32
 
33
-
34
  def cos_sim(sentence1_emb, sentence2_emb):
35
  cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
36
  return np.diag(cos_sim)
37
 
38
-
39
  # let's read the csv file
40
  data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
41
 
@@ -46,61 +48,35 @@ data.rename(columns={'target_text': 'sentence2',
46
  data['sentence2'] = data['sentence2'].astype('str')
47
  data['sentence1'] = data['sentence1'].astype('str')
48
 
 
49
  XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
50
  sentence_pairs = []
51
  for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
52
- sentence_pairs.append([sentence1, sentence2])
53
 
54
  data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
55
- sentence_pairs, show_progress_bar=True)
56
-
57
- # sorting the values
58
- data.sort_values(by=['SBERT CrossEncoder_Score'], ascending=False)
59
 
60
  loaded_model = XpathFinder
61
 
62
- # Containers
63
  header_container = st.container()
64
  mod_container = st.container()
65
 
66
- # Header
67
  with header_container:
 
 
68
 
69
- # different levels of text you can include in your app
70
- st.title("Xpath Finder App")
71
-
72
-
73
- # model container
74
  with mod_container:
75
-
76
- # collecting input from user
77
- prompt = st.text_input("Enter your description below ...")
78
-
79
- # Loading e data
80
- data = (pd.read_csv("/content/SBERT_data.csv")
81
- ).drop(['Unnamed: 0'], axis=1)
82
-
83
- data['prompt'] = prompt
84
- data.rename(columns={'target_text': 'sentence2',
85
- 'prompt': 'sentence1'}, inplace=True)
86
- data['sentence2'] = data['sentence2'].astype('str')
87
- data['sentence1'] = data['sentence1'].astype('str')
88
-
89
- # let's pass the input to the loaded_model with torch compiled with cuda
90
- if prompt:
91
- # let's get the result
92
- simscore = loaded_model.predict([prompt])
93
-
94
- from sentence_transformers import CrossEncoder
95
- loaded_model = CrossEncoder("cross-encoder/stsb-roberta-base")
96
- sentence_pairs = []
97
- for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
98
- sentence_pairs.append([sentence1, sentence2])
99
-
100
- # sorting the df to get highest scoring xpath_container
101
- data['SBERT CrossEncoder_Score'] = loaded_model.predict(sentence_pairs)
102
- most_acc = data.head(5)
103
- # predictions
104
- st.write("Highest Similarity score: ", simscore)
105
- st.text("Is this one of these the Xpath you're looking for?")
106
- st.write(st.write(most_acc["input_text"]))
 
1
+ # let's import the libraries
2
+ from email import header
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import numpy as np
6
+ import pickle
7
  import spacy
8
  from sklearn.metrics.pairwise import cosine_similarity
9
  from datasets import load_dataset
10
  import io
11
  import netrc
 
 
 
 
 
 
12
  from tqdm import tqdm
13
  tqdm.pandas()
14
+ import torch
15
+ import os
16
+ import sys
17
+ import time
18
+ import sentence_transformers
19
+ from sentence_transformers import SentenceTransformer
20
+ from sentence_transformers import CrossEncoder
21
+ from sentence_transformers import util
22
 
23
+ # let's load the english stsb dataset
24
  stsb_dataset = load_dataset('stsb_multi_mt', 'en')
25
  stsb_train = pd.DataFrame(stsb_dataset['train'])
26
  stsb_test = pd.DataFrame(stsb_dataset['test'])
 
28
  # let's create helper functions
29
  nlp = spacy.load("en_core_web_sm")
30
 
 
31
  def text_processing(sentence):
32
  sentence = [token.lemma_.lower()
33
  for token in nlp(sentence)
34
  if token.is_alpha and not token.is_stop]
35
  return sentence
36
 
 
37
  def cos_sim(sentence1_emb, sentence2_emb):
38
  cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
39
  return np.diag(cos_sim)
40
 
 
41
  # let's read the csv file
42
  data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
43
 
 
48
  data['sentence2'] = data['sentence2'].astype('str')
49
  data['sentence1'] = data['sentence1'].astype('str')
50
 
51
+ # loop through the data
52
  XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
53
  sentence_pairs = []
54
  for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
55
+ sentence_pairs.append([sentence1, sentence2])
56
 
57
  data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
58
+ sentence_pairs, show_progress_bar=True)
 
 
 
59
 
60
  loaded_model = XpathFinder
61
 
62
+ # let's create containers
63
  header_container = st.container()
64
  mod_container = st.container()
65
 
66
+ # let's create the header
67
  with header_container:
68
+ st.title("SBERT CrossEncoder")
69
+ st.markdown("This is a demo of the SBERT CrossEncoder model")
70
 
71
+ # let's create the model container
 
 
 
 
72
  with mod_container:
73
+ # let's get input from the user
74
+ prompt = st.text_input("Enter a description below...")
75
+
76
+ if prompt:
77
+ simscore = loaded_model.predict([prompt])
78
+ # sort the values
79
+ data['SBERT CrossEncoder_Score'] = simscore
80
+ most_acc = data.head(5)
81
+ st.write(most_acc)
82
+ st.write("The most accurate sentence is: ", most_acc['sentence2'].iloc[0])