stogaja commited on
Commit
985d3f5
1 Parent(s): f86de2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -36
app.py CHANGED
@@ -1,5 +1,8 @@
1
- from importlib.machinery import PathFinder
2
-
 
 
 
3
  import io
4
  import netrc
5
  import pickle
@@ -7,33 +10,54 @@ import sys
7
  import pandas as pd
8
  import numpy as np
9
  import streamlit as st
10
- # let's import sentence transformer
11
  import sentence_transformers
12
  import torch
13
- #######################################
14
-
15
- st.markdown(
16
- f"""
17
- <style>
18
- .reportview-container .main .block-container{{
19
- max-width: 90%;
20
- padding-top: 5rem;
21
- padding-right: 5rem;
22
- padding-left: 5rem;
23
- padding-bottom: 5rem;
24
- }}
25
- img{{
26
- max-width:40%;
27
- margin-bottom:40px;
28
- }}
29
- </style>
30
- """,
31
- unsafe_allow_html=True,
32
- )
33
-
34
- # # let's load the saved model
35
- loaded_model = pickle.load(open('https://drive.google.com/file/d/1ipi5f0B0i5nxUTsjK7J_mNmirp2RyK9c/view?usp=sharing', 'rb'))
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  # Containers
39
  header_container = st.container()
@@ -48,32 +72,35 @@ with header_container:
48
 
49
  # model container
50
  with mod_container:
 
51
  # collecting input from user
52
  prompt = st.text_input("Enter your description below ...")
53
 
54
  # Loading e data
55
- data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis = 1)
 
56
 
57
- data['prompt']= prompt
58
- data.rename(columns = {'target_text':'sentence2', 'prompt':'sentence1'}, inplace = True)
 
59
  data['sentence2'] = data['sentence2'].astype('str')
60
- data['sentence1'] = data['sentence1'].astype('str')
61
 
62
  # let's pass the input to the loaded_model with torch compiled with cuda
63
  if prompt:
64
  # let's get the result
65
- simscore = loaded_model.predict([prompt])
66
-
67
  from sentence_transformers import CrossEncoder
68
  loaded_model = CrossEncoder("cross-encoder/stsb-roberta-base")
69
  sentence_pairs = []
70
- for sentence1, sentence2 in zip(data['sentence1'],data['sentence2']):
71
- sentence_pairs.append([sentence1, sentence2])
72
-
73
  # sorting the df to get highest scoring xpath_container
74
  data['SBERT CrossEncoder_Score'] = loaded_model.predict(sentence_pairs)
75
  most_acc = data.head(5)
76
  # predictions
77
  st.write("Highest Similarity score: ", simscore)
78
  st.text("Is this one of these the Xpath you're looking for?")
79
- st.write(st.write(most_acc["input_text"]))
 
1
+ # let's import the libraries we need
2
+ from sentence_transformers import CrossEncoder
3
+ import spacy
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ from datasets import load_dataset
6
  import io
7
  import netrc
8
  import pickle
 
10
  import pandas as pd
11
  import numpy as np
12
  import streamlit as st
 
13
  import sentence_transformers
14
  import torch
15
+ from tqdm import tqdm
16
+ tqdm.pandas()
17
+
18
+ # Load the English STSB dataset
19
+ stsb_dataset = load_dataset('stsb_multi_mt', 'en')
20
+ stsb_train = pd.DataFrame(stsb_dataset['train'])
21
+ stsb_test = pd.DataFrame(stsb_dataset['test'])
22
+
23
+ # let's create helper functions
24
+ nlp = spacy.load("en_core_web_sm")
25
+
26
+
27
+ def text_processing(sentence):
28
+ sentence = [token.lemma_.lower()
29
+ for token in nlp(sentence)
30
+ if token.is_alpha and not token.is_stop]
31
+ return sentence
32
+
33
+
34
+ def cos_sim(sentence1_emb, sentence2_emb):
35
+ cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
36
+ return np.diag(cos_sim)
37
+
38
 
39
+ # let's read the csv file
40
+ data = (pd.read_csv("/SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
41
+
42
+ prompt = "charles"
43
+ data['prompt'] = prompt
44
+ data.rename(columns={'target_text': 'sentence2',
45
+ 'prompt': 'sentence1'}, inplace=True)
46
+ data['sentence2'] = data['sentence2'].astype('str')
47
+ data['sentence1'] = data['sentence1'].astype('str')
48
+
49
+ XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
50
+ sentence_pairs = []
51
+ for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
52
+ sentence_pairs.append([sentence1, sentence2])
53
+
54
+ data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
55
+ sentence_pairs, show_progress_bar=True)
56
+
57
+ # sorting the values
58
+ data.sort_values(by=['SBERT CrossEncoder_Score'], ascending=False)
59
+
60
+ loaded_model = XpathFinder
61
 
62
  # Containers
63
  header_container = st.container()
 
72
 
73
  # model container
74
  with mod_container:
75
+
76
  # collecting input from user
77
  prompt = st.text_input("Enter your description below ...")
78
 
79
  # Loading e data
80
+ data = (pd.read_csv("/content/SBERT_data.csv")
81
+ ).drop(['Unnamed: 0'], axis=1)
82
 
83
+ data['prompt'] = prompt
84
+ data.rename(columns={'target_text': 'sentence2',
85
+ 'prompt': 'sentence1'}, inplace=True)
86
  data['sentence2'] = data['sentence2'].astype('str')
87
+ data['sentence1'] = data['sentence1'].astype('str')
88
 
89
  # let's pass the input to the loaded_model with torch compiled with cuda
90
  if prompt:
91
  # let's get the result
92
+ simscore = PathFinder.predict([prompt])
93
+
94
  from sentence_transformers import CrossEncoder
95
  loaded_model = CrossEncoder("cross-encoder/stsb-roberta-base")
96
  sentence_pairs = []
97
+ for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
98
+ sentence_pairs.append([sentence1, sentence2])
99
+
100
  # sorting the df to get highest scoring xpath_container
101
  data['SBERT CrossEncoder_Score'] = loaded_model.predict(sentence_pairs)
102
  most_acc = data.head(5)
103
  # predictions
104
  st.write("Highest Similarity score: ", simscore)
105
  st.text("Is this one of these the Xpath you're looking for?")
106
+ st.write(st.write(most_acc["input_text"]))