Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
import streamlit as st
|
3 |
import requests
|
4 |
import numpy as np
|
5 |
-
from streamlit_lottie import st_lottie
|
6 |
from PIL import Image
|
7 |
import warnings
|
8 |
warnings.filterwarnings("ignore")
|
@@ -43,467 +42,12 @@ options.add_argument("disable-infobars")
|
|
43 |
options.add_argument("--disable-extensions")
|
44 |
driver = webdriver.Chrome('chromedriver',options=options)
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
job1 = job_type.split(" ")[0]
|
50 |
-
job2 = job_type.split(" ")[1]
|
51 |
-
link1 = 'https://wuzzuf.net/search/jobs/?a=navbl&q='+job1+'%20'+job1
|
52 |
-
title = []
|
53 |
-
location = []
|
54 |
-
country = []
|
55 |
-
job_description = []
|
56 |
-
Job_Requirements =[]
|
57 |
-
company_name = []
|
58 |
-
links = []
|
59 |
-
Jop_type = []
|
60 |
-
Career_Level = []
|
61 |
-
company_logo = []
|
62 |
-
Job_Categories = []
|
63 |
-
Skills_And_Tools = []
|
64 |
-
Experience_Needed =[]
|
65 |
-
post_time = []
|
66 |
-
Title = []
|
67 |
-
pages_num = np.ceil(job_num/15)
|
68 |
-
|
69 |
-
|
70 |
-
for i in range(int(pages_num) ):
|
71 |
-
link_new = link1 +'&start='+str(i)
|
72 |
-
data = requests.get(link_new)
|
73 |
-
soup = BeautifulSoup(data.content)
|
74 |
-
Title = soup.find_all('h2' , {'class': 'css-m604qf'})
|
75 |
-
|
76 |
-
# to get the info about jobs
|
77 |
-
|
78 |
-
for x in range(0,len(Title)):
|
79 |
-
t = re.split('\(|\-',Title[x].find('a').text)
|
80 |
-
title.append(t[0].strip())
|
81 |
-
loc = re.split(',' , soup.find_all('span' , {'class': 'css-5wys0k'})[x].text)
|
82 |
-
r = ""
|
83 |
-
for i in range(len(loc[:-1])):
|
84 |
-
r= r+ ', ' +loc[:-1][i].strip()
|
85 |
-
location.append(r.replace(',', '', 1).strip())
|
86 |
-
country.append(loc[-1].strip())
|
87 |
-
links.append('https://wuzzuf.net' + Title[x].find('a').attrs['href'])
|
88 |
-
m = " ".join(re.findall("[a-zA-Z\d+]+", (soup.find_all('div' , {'class': 'css-d7j1kk'})[x].find('a').text)))
|
89 |
-
company_name.append(m)
|
90 |
-
c = soup.find_all('div' ,{'class':'css-1lh32fc'})[x].find_all('span')
|
91 |
-
if len(c) ==1:
|
92 |
-
Jop_type.append(c[0].text)
|
93 |
-
else:
|
94 |
-
n =[]
|
95 |
-
for i in range(len(c)):
|
96 |
-
n.append(c[i].text)
|
97 |
-
Jop_type.append(n)
|
98 |
-
n =soup.find_all('div' ,{'class':'css-y4udm8'})[x].find_all('div')[1].find_all(['a','span'])
|
99 |
-
Career_Level.append(n[0].text)
|
100 |
-
n =soup.find_all('div' ,{'class':'css-y4udm8'})[x].find_all('div')[1].find_all(['a','span'])
|
101 |
-
|
102 |
-
yy = n[1].text.replace('·',' ').strip()
|
103 |
-
yy = re.findall('[0-9-+]*',yy)
|
104 |
-
y1 =""
|
105 |
-
for i in range(len(yy)):
|
106 |
-
|
107 |
-
if any(yy[i]):
|
108 |
-
y1 = y1+yy[i]
|
109 |
-
if y1 != "":
|
110 |
-
Experience_Needed.append(y1)
|
111 |
-
else:
|
112 |
-
Experience_Needed.append("Not Specified")
|
113 |
-
time = (soup.find_all('div' ,{'class':'css-d7j1kk'}))[x].find('div')
|
114 |
-
post_time.append(time.text)
|
115 |
-
|
116 |
-
# to get the logo of the company
|
117 |
-
|
118 |
-
data1 = requests.get(links[x])
|
119 |
-
soup1 = BeautifulSoup(data1.content)
|
120 |
-
company_logo.append(soup1.find_all('meta',{'property':"og:image"})[0]['content'])
|
121 |
-
#time.sleep(4)
|
122 |
-
|
123 |
-
|
124 |
-
# get Job_Categories , Skills_And_Tools , job_description , and job_requirements from urls
|
125 |
-
driver = webdriver.Chrome('chromedriver',options=options)
|
126 |
-
#driver.implicitly_wait(10)
|
127 |
-
driver.get(links[x])
|
128 |
-
Job_Categories.append(driver.find_element(By.XPATH ,'//*[@id="app"]/div/main/section[2]/div[5]').text.split("\n")[1:])
|
129 |
-
Skills_And_Tools.append(driver.find_element(By.XPATH ,'//*[@id="app"]/div/main/section[2]/div[6]').text.split("\n")[1:])
|
130 |
-
job_description.append(driver.find_element(By.XPATH ,'//*[@id="app"]/div/main/section[3]').text.split("\n")[1:])
|
131 |
-
all =driver.find_elements(By.XPATH ,'//*[@id="app"]/div/main/section[4]/div')
|
132 |
-
dict_other = {}
|
133 |
-
|
134 |
-
new = all[0].text.split("\n\n")
|
135 |
-
|
136 |
-
if len(new)!=1 :
|
137 |
-
for i in range(len(new)):
|
138 |
-
result =[]
|
139 |
-
for k in (new[i].split('\n')[1:]):
|
140 |
-
result.append(k.replace("\u202f"," "))
|
141 |
-
dict_other[new[i].split('\n')[0]] = result
|
142 |
-
|
143 |
-
#result = re.sub('[\W_]+', '', ini_string)
|
144 |
-
|
145 |
-
Job_Requirements.append(dict_other)
|
146 |
-
|
147 |
-
else:
|
148 |
-
nn = new[0].replace("\u202f"," ")
|
149 |
-
Job_Requirements.append(nn.split('\n'))
|
150 |
-
|
151 |
-
|
152 |
-
# create data frame to combine all together
|
153 |
-
|
154 |
-
df = pd.DataFrame({'Title' : title , 'Location' : location ,'country':country,'URLs':links ,'Company_Name' : company_name,'Career_Level':Career_Level,'post_time':post_time,'Experience_Needed':Experience_Needed,'Company_Logo':company_logo,"Job_Categories":Job_Categories , "Skills_And_Tools":Skills_And_Tools , "job_description":job_description,"Job_Requirements":Job_Requirements})
|
155 |
-
|
156 |
-
df[:job_num].to_excel('WUZZUF_scrapping.xlsx',index=False,encoding='utf-8')
|
157 |
-
return df[:job_num]
|
158 |
-
|
159 |
-
|
160 |
-
# linkedin function
|
161 |
-
|
162 |
-
|
163 |
-
def LINKEDIN_Scrapping(job_search , num_jobs):
|
164 |
-
job1 = job_search.split(" ")[0]
|
165 |
-
job2 = job_search.split(" ")[1]
|
166 |
-
|
167 |
-
link1 = 'https://www.linkedin.com/jobs/search?keywords='+job1 +'%20' +job2 +'&location=&geoId=&trk=public_jobs_jobs-search-bar_search-submit&position=1&pageNum=0'
|
168 |
-
|
169 |
-
# FIRST get main informations about jobs
|
170 |
-
|
171 |
-
title = []
|
172 |
-
location = []
|
173 |
-
country = []
|
174 |
-
company_name = []
|
175 |
-
post_time = []
|
176 |
-
links =[]
|
177 |
-
# get the specific numbers of jobs
|
178 |
-
l1 = ""
|
179 |
-
ll =""
|
180 |
-
driver = webdriver.Chrome('chromedriver',options=options)
|
181 |
-
driver.get(link1)
|
182 |
-
SCROLL_PAUSE_TIME = 0.5
|
183 |
-
while True :
|
184 |
-
l1 = driver.find_elements(By.XPATH,'//*[@id="main-content"]/section[2]/ul/li[*]/div')
|
185 |
-
ll= driver.find_elements(By.XPATH ,'//*[@id="main-content"]/section[2]/ul/li[*]/div/a')
|
186 |
-
|
187 |
-
if len(l1) >= num_jobs:
|
188 |
-
break
|
189 |
-
time.sleep(3)
|
190 |
-
# Get scroll height
|
191 |
-
last_height = driver.execute_script("return document.body.scrollHeight")
|
192 |
-
while True:
|
193 |
-
|
194 |
-
# Scroll down to bottom
|
195 |
-
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
196 |
-
|
197 |
-
# Wait to load page
|
198 |
-
time.sleep(SCROLL_PAUSE_TIME)
|
199 |
-
|
200 |
-
# Calculate new scroll height and compare with last scroll height
|
201 |
-
new_height = driver.execute_script("return document.body.scrollHeight")
|
202 |
-
if new_height == last_height:
|
203 |
-
break
|
204 |
-
last_height = new_height
|
205 |
-
|
206 |
-
options.add_argument("window-size=1200x600")
|
207 |
-
WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="main-content"]/section[2]/button'))).click()
|
208 |
-
print(len(l1))
|
209 |
-
time.sleep(2)
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
l2 = l1[:num_jobs]
|
214 |
-
|
215 |
-
for info in l2:
|
216 |
-
info_tot = info.text.split("\n")
|
217 |
-
if len(info_tot)==5:
|
218 |
-
title.append(info_tot[1])
|
219 |
-
location.append(info_tot[3])
|
220 |
-
company_name.append(info_tot[2])
|
221 |
-
post_time.append(info_tot[4])
|
222 |
-
else:
|
223 |
-
title.append(info_tot[1])
|
224 |
-
location.append(info_tot[3])
|
225 |
-
company_name.append(info_tot[2])
|
226 |
-
post_time.append(info_tot[5])
|
227 |
-
|
228 |
-
# get links for jobs
|
229 |
-
l3 = ll[:num_jobs]
|
230 |
-
for i in l3:
|
231 |
-
links.append(i.get_attribute('href'))
|
232 |
-
|
233 |
-
df_ml = pd.DataFrame({'Title' : title , 'Location' : location ,'URLs':links ,'Company_Name' : company_name ,'post_time':post_time})
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
# GET DESCRIPTION AND LOGO
|
239 |
-
def all_description_LOGO(urls):
|
240 |
-
description =[]
|
241 |
-
LOGO =[]
|
242 |
-
for link in urls:
|
243 |
-
driver = webdriver.Chrome('chromedriver',options=options)
|
244 |
-
driver.get(link)
|
245 |
-
options.add_argument("window-size=1200x600")
|
246 |
-
WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="main-content"]/section[1]/div/div[1]/section[1]/div/div/section/button[1]'))).click()
|
247 |
-
qqq= 4+444*58/7+65
|
248 |
-
K = driver.find_element(By.XPATH,'//*[@id="main-content"]/section[1]/div/section[2]/div/a/img')
|
249 |
-
LOGO.append(K.get_attribute('src'))
|
250 |
-
time.sleep(3)
|
251 |
-
t = driver.find_element(By.XPATH ,'//*[@id="main-content"]/section[1]/div/div[1]/section[1]/div/div/section/div')
|
252 |
-
t_reverse=t.text[::-1]
|
253 |
-
|
254 |
-
if t_reverse[:9] =="erom wohs":
|
255 |
-
l = len(t.text)
|
256 |
-
strings=t.text[:l-9].split("\n")
|
257 |
-
strings[:] = [x for x in strings if x]
|
258 |
-
description.append(strings)
|
259 |
-
else:
|
260 |
-
strings=t.text.split("\n")
|
261 |
-
strings[:] = [x for x in strings if x]
|
262 |
-
description.append(strings)
|
263 |
-
df_ml = pd.DataFrame({'all_about_job' : description ,'company_logo':LOGO})
|
264 |
-
|
265 |
-
return df_ml
|
266 |
-
|
267 |
-
# apply desc. and logo function
|
268 |
-
E = all_description_LOGO(links)
|
269 |
-
|
270 |
-
# other info function
|
271 |
-
def other(urls):
|
272 |
-
frames =[]
|
273 |
-
for url in urls:
|
274 |
-
data1 = requests.get(url)
|
275 |
-
soup1 = BeautifulSoup(data1.content)
|
276 |
-
j = soup1.find('ul' , {'class': 'description__job-criteria-list'})
|
277 |
-
time.sleep(4)
|
278 |
-
jj=j.find_all('h3')
|
279 |
-
dic ={}
|
280 |
-
for i in range(len(jj)):
|
281 |
-
dic[jj[i].text.replace('\n',' ').strip()] = j.find_all('span')[i].text.replace('\n',' ').strip()
|
282 |
-
output = pd.DataFrame()
|
283 |
-
output = output.append(dic, ignore_index=True)
|
284 |
-
frames.append(output)
|
285 |
-
result = pd.concat(frames)
|
286 |
-
return result
|
287 |
-
|
288 |
-
# apply Other function
|
289 |
-
df = other(links)
|
290 |
-
df.fillna('Not_Found',inplace= True)
|
291 |
-
df.reset_index(inplace=True, drop=True)
|
292 |
-
|
293 |
-
# combine all together
|
294 |
-
result = pd.concat([df_ml,E, df ], axis=1)
|
295 |
-
|
296 |
-
return result
|
297 |
-
|
298 |
-
|
299 |
-
##################### map_bubble #####################
|
300 |
-
|
301 |
-
#### function to show map for loaction of the job
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
def map_bubble(df):
|
306 |
-
|
307 |
-
import requests
|
308 |
-
import urllib.parse
|
309 |
-
g =[]
|
310 |
-
for i in range(len(df.Location)):
|
311 |
-
|
312 |
-
if df.Location.loc[i].split(","):
|
313 |
-
g.append(df.Location.loc[i].split(",")[0])
|
314 |
-
else:
|
315 |
-
g.append(df.Location.loc[i])
|
316 |
-
df['new_loc']=g
|
317 |
-
if 'country' in df.columns:
|
318 |
-
df["full_location"] = df["new_loc"] + ", " +df["country"]
|
319 |
-
dict_cities = dict(df.full_location.value_counts())
|
320 |
-
else :
|
321 |
-
dict_cities = dict(df.new_loc.value_counts())
|
322 |
-
lat = []
|
323 |
-
lon = []
|
324 |
-
bubble_df = pd.DataFrame()
|
325 |
-
add=[]
|
326 |
-
val=[]
|
327 |
-
try:
|
328 |
-
for address in dict_cities.keys():
|
329 |
-
url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(address) +'?format=json'
|
330 |
-
|
331 |
-
response = requests.get(url).json()
|
332 |
-
lat.append(response[0]["lat"])
|
333 |
-
lon.append(response[0]["lon"])
|
334 |
-
add.append(address)
|
335 |
-
val.append(dict_cities[address])
|
336 |
-
except:
|
337 |
-
pass
|
338 |
-
|
339 |
-
bubble_df['address'] =add
|
340 |
-
bubble_df['lat'] = lat
|
341 |
-
bubble_df['lon'] = lon
|
342 |
-
bubble_df['value'] = val
|
343 |
-
|
344 |
-
|
345 |
-
# import the library
|
346 |
-
import folium
|
347 |
-
|
348 |
-
# Make an empty map
|
349 |
-
m = folium.Map(location=[20,0], tiles="OpenStreetMap", zoom_start=2)
|
350 |
-
# add marker one by one on the map
|
351 |
-
for i in range(0,len(bubble_df)):
|
352 |
-
folium.Circle(
|
353 |
-
location=[bubble_df.iloc[i]['lat'], bubble_df.iloc[i]['lon']],
|
354 |
-
|
355 |
-
popup=bubble_df.iloc[i][['address','value']].values,
|
356 |
-
radius=float(bubble_df.iloc[i]['value'])*500,
|
357 |
-
color='#69b3a2',
|
358 |
-
fill=True,
|
359 |
-
fill_color='#69b3a2'
|
360 |
-
).add_to(m)
|
361 |
-
m
|
362 |
-
# Show the map again
|
363 |
-
return m
|
364 |
-
|
365 |
-
|
366 |
-
##########################
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
#########################
|
373 |
-
#### wuzzuf analysis
|
374 |
-
def wuzzuf_exp(df1):
|
375 |
-
top10_job_title = df1['Title'].value_counts()[:10]
|
376 |
-
fig1 = px.bar(y=top10_job_title.values,
|
377 |
-
x=top10_job_title.index,
|
378 |
-
color = top10_job_title.index,
|
379 |
-
color_discrete_sequence=px.colors.sequential.deep,
|
380 |
-
text=top10_job_title.values,
|
381 |
-
title= 'Top 10 Job Titles',
|
382 |
-
template= 'plotly_dark')
|
383 |
-
fig1.update_layout(height=500,width=500,
|
384 |
-
xaxis_title="Job Titles",
|
385 |
-
yaxis_title="count",
|
386 |
-
font = dict(size=17,family="Franklin Gothic"))
|
387 |
-
st.plotly_chart(fig1)
|
388 |
-
|
389 |
-
type_grouped = df1['Career_Level'].value_counts()
|
390 |
-
#e_type = ['Full-Time','Part-Time','Contract','Freelance']
|
391 |
-
e_type =dict(df1['Career_Level'].value_counts()).keys()
|
392 |
-
fig2 = px.bar(x = e_type, y = type_grouped.values,
|
393 |
-
color = type_grouped.index,
|
394 |
-
color_discrete_sequence=px.colors.sequential.dense,
|
395 |
-
template = 'plotly_dark',
|
396 |
-
text = type_grouped.values, title = 'Career Level Distribution')
|
397 |
-
fig2.update_layout( height=500, width=500,
|
398 |
-
xaxis_title="Career Level",
|
399 |
-
yaxis_title="count",
|
400 |
-
font = dict(size=17,family="Franklin Gothic"))
|
401 |
-
fig2.update_traces(width=0.5)
|
402 |
-
st.plotly_chart(fig2)
|
403 |
-
residence = df1['Location'].value_counts()
|
404 |
-
top10_employee_location = residence[:10]
|
405 |
-
fig3 = px.bar(y=top10_employee_location.values,
|
406 |
-
x=top10_employee_location.index,
|
407 |
-
color = top10_employee_location.index,
|
408 |
-
color_discrete_sequence=px.colors.sequential.deep,
|
409 |
-
text=top10_employee_location.values,
|
410 |
-
title= 'Top 10 Location of job',
|
411 |
-
template= 'plotly_dark')
|
412 |
-
fig3.update_layout(height=500,width=500,
|
413 |
-
xaxis_title="Location of job",
|
414 |
-
yaxis_title="count",
|
415 |
-
font = dict(size=17,family="Franklin Gothic"))
|
416 |
-
st.plotly_chart(fig3)
|
417 |
-
|
418 |
-
type_grouped = df1['Experience_Needed'].value_counts()
|
419 |
-
#e_type = ['Full-Time','Part-Time','Contract','Freelance']
|
420 |
-
e_type =dict(df1['Experience_Needed'].value_counts()).keys()
|
421 |
-
fig4 = px.bar(x = e_type, y = type_grouped.values,
|
422 |
-
color = type_grouped.index,
|
423 |
-
color_discrete_sequence=px.colors.sequential.dense,
|
424 |
-
template = 'plotly_dark',
|
425 |
-
text = type_grouped.values, title = ' Experience Level Distribution')
|
426 |
-
fig4.update_layout(height=500,width=500,
|
427 |
-
xaxis_title=" Experience Level (years)",
|
428 |
-
yaxis_title="count",
|
429 |
-
font = dict(size=17,family="Franklin Gothic"))
|
430 |
-
fig4.update_traces(width=0.5)
|
431 |
-
st.plotly_chart(fig4)
|
432 |
-
return
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
#########################
|
437 |
-
### linkedin analysis
|
438 |
-
|
439 |
-
def linkedin_exp(df1):
|
440 |
-
top10_job_title = df1['Title'].value_counts()[:10]
|
441 |
-
fig1 = px.bar(y=top10_job_title.values,
|
442 |
-
x=top10_job_title.index,
|
443 |
-
color = top10_job_title.index,
|
444 |
-
color_discrete_sequence=px.colors.sequential.deep,
|
445 |
-
text=top10_job_title.values,
|
446 |
-
title= 'Top 10 Job Titles',
|
447 |
-
template= 'plotly_dark')
|
448 |
-
fig1.update_layout(height=500,width=500,
|
449 |
-
xaxis_title="Job Titles",
|
450 |
-
yaxis_title="count",
|
451 |
-
font = dict(size=17,family="Franklin Gothic"))
|
452 |
-
st.plotly_chart(fig1)
|
453 |
-
|
454 |
-
type_grouped = df1['Employment type'].value_counts()
|
455 |
-
#e_type = ['Full-Time','Part-Time','Contract','Freelance']
|
456 |
-
e_type =dict(df1['Employment type'].value_counts()).keys()
|
457 |
-
fig2 = px.bar(x = e_type, y = type_grouped.values,
|
458 |
-
color = type_grouped.index,
|
459 |
-
color_discrete_sequence=px.colors.sequential.dense,
|
460 |
-
template = 'plotly_dark',
|
461 |
-
text = type_grouped.values, title = 'Employment type Distribution')
|
462 |
-
fig2.update_layout( height=500, width=500,
|
463 |
-
xaxis_title="Employment type",
|
464 |
-
yaxis_title="count",
|
465 |
-
font = dict(size=17,family="Franklin Gothic"))
|
466 |
-
fig2.update_traces(width=0.5)
|
467 |
-
st.plotly_chart(fig2)
|
468 |
-
residence = df1['Location'].value_counts()
|
469 |
-
top10_employee_location = residence[:10]
|
470 |
-
fig3 = px.bar(y=top10_employee_location.values,
|
471 |
-
x=top10_employee_location.index,
|
472 |
-
color = top10_employee_location.index,
|
473 |
-
color_discrete_sequence=px.colors.sequential.deep,
|
474 |
-
text=top10_employee_location.values,
|
475 |
-
title= 'Top 10 Location of job',
|
476 |
-
template= 'plotly_dark')
|
477 |
-
fig3.update_layout(height=500,width=500,
|
478 |
-
xaxis_title="Location of job",
|
479 |
-
yaxis_title="count",
|
480 |
-
font = dict(size=17,family="Franklin Gothic"))
|
481 |
-
st.plotly_chart(fig3)
|
482 |
-
|
483 |
-
type_grouped = df1['Seniority level'].value_counts()
|
484 |
-
#e_type = ['Full-Time','Part-Time','Contract','Freelance']
|
485 |
-
e_type =dict(df1['Seniority level'].value_counts()).keys()
|
486 |
-
fig4 = px.bar(x = e_type, y = type_grouped.values,
|
487 |
-
color = type_grouped.index,
|
488 |
-
color_discrete_sequence=px.colors.sequential.dense,
|
489 |
-
template = 'plotly_dark',
|
490 |
-
text = type_grouped.values, title = 'Seniority level Distribution')
|
491 |
-
fig4.update_layout(height=500,width=500,
|
492 |
-
xaxis_title="Seniority level",
|
493 |
-
yaxis_title="count",
|
494 |
-
font = dict(size=17,family="Franklin Gothic"))
|
495 |
-
fig4.update_traces(width=0.5)
|
496 |
-
st.plotly_chart(fig4)
|
497 |
-
return
|
498 |
-
|
499 |
-
|
500 |
-
########################
|
501 |
|
502 |
####################### stream lit app ################################
|
503 |
|
504 |
-
#site = ""
|
505 |
-
#job =""
|
506 |
-
#num_jobs = 0
|
507 |
|
508 |
st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide")
|
509 |
|
@@ -519,7 +63,7 @@ with st.container():
|
|
519 |
)
|
520 |
st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)")
|
521 |
with right_column:
|
522 |
-
|
523 |
# st_lottie(lottie_coding, height=300, key="coding")
|
524 |
|
525 |
|
@@ -556,7 +100,7 @@ if st.sidebar.button('Start Scrapping'):
|
|
556 |
st.write("---")
|
557 |
tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
|
558 |
with tab1 :
|
559 |
-
with
|
560 |
time.sleep(5)
|
561 |
n1 = Wuzzuf_scrapping(job ,num_jobs )
|
562 |
try:
|
@@ -578,7 +122,7 @@ if st.sidebar.button('Start Scrapping'):
|
|
578 |
st.write("---")
|
579 |
tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
|
580 |
with tab1 :
|
581 |
-
with
|
582 |
time.sleep(5)
|
583 |
n1 = LINKEDIN_Scrapping(job ,num_jobs )
|
584 |
try:
|
|
|
2 |
import streamlit as st
|
3 |
import requests
|
4 |
import numpy as np
|
|
|
5 |
from PIL import Image
|
6 |
import warnings
|
7 |
warnings.filterwarnings("ignore")
|
|
|
42 |
options.add_argument("--disable-extensions")
|
43 |
driver = webdriver.Chrome('chromedriver',options=options)
|
44 |
|
45 |
+
from wuzzuf_scraper import Wuzzuf_scrapping
|
46 |
+
from linkedin_scraper import LINKEDIN_Scrapping
|
47 |
+
from data_analysis import map_bubble,linkedin_exp,wuzzuf_exp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
####################### stream lit app ################################
|
50 |
|
|
|
|
|
|
|
51 |
|
52 |
st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide")
|
53 |
|
|
|
63 |
)
|
64 |
st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)")
|
65 |
with right_column:
|
66 |
+
st.image("im.gif", use_column_width=True)
|
67 |
# st_lottie(lottie_coding, height=300, key="coding")
|
68 |
|
69 |
|
|
|
100 |
st.write("---")
|
101 |
tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
|
102 |
with tab1 :
|
103 |
+
with st.spinner('✨Now loading...' ):
|
104 |
time.sleep(5)
|
105 |
n1 = Wuzzuf_scrapping(job ,num_jobs )
|
106 |
try:
|
|
|
122 |
st.write("---")
|
123 |
tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
|
124 |
with tab1 :
|
125 |
+
with st.spinner('✨Now loading...' ):
|
126 |
time.sleep(5)
|
127 |
n1 = LINKEDIN_Scrapping(job ,num_jobs )
|
128 |
try:
|