Spaces:

Yassmen
/

Job.web.scrapping

Sleeping

App Files Files Community

Job.web.scrapping / data_analysis.py

Yassmen

Update data_analysis.py

c6e3370 verified 21 days ago

raw

history blame contribute delete

7.12 kB

	#### function to show map for loaction of the job
	import time
	import matplotlib.pyplot as plt
	import streamlit as st
	import seaborn as sns
	import matplotlib as mpl
	import plotly
	import plotly.express as px
	import plotly.graph_objs as go
	import plotly.offline as py
	from plotly.offline import iplot
	from plotly.subplots import make_subplots
	import plotly.figure_factory as ff

	import pandas as pd

	def map_bubble(df):

	import requests
	import urllib.parse
	g =[]
	for i in range(len(df.Location)):

	if df.Location.loc[i].split(","):
	g.append(df.Location.loc[i].split(",")[0])
	else:
	g.append(df.Location.loc[i])
	df['new_loc']=g
	if 'country' in df.columns:
	df["full_location"] = df["new_loc"] + ", " +df["country"]
	dict_cities = dict(df.full_location.value_counts())
	else :
	dict_cities = dict(df.new_loc.value_counts())
	lat = []
	lon = []
	bubble_df = pd.DataFrame()
	add=[]
	val=[]
	try:
	for address in dict_cities.keys():
	url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(address) +'?format=json'

	response = requests.get(url).json()
	lat.append(response[0]["lat"])
	lon.append(response[0]["lon"])
	add.append(address)
	val.append(dict_cities[address])
	except:
	pass

	bubble_df['address'] =add
	bubble_df['lat'] = lat
	bubble_df['lon'] = lon
	bubble_df['value'] = val


	# import the library
	import folium

	# Make an empty map
	m = folium.Map(location=[20,0], tiles="OpenStreetMap", zoom_start=2)
	# add marker one by one on the map
	for i in range(0,len(bubble_df)):
	folium.Circle(
	location=[bubble_df.iloc[i]['lat'], bubble_df.iloc[i]['lon']],

	popup=bubble_df.iloc[i][['address','value']].values,
	radius=float(bubble_df.iloc[i]['value'])*500,
	color='#69b3a2',
	fill=True,
	fill_color='#69b3a2'
	).add_to(m)
	m
	# Show the map again
	return m


	##########################





	#########################
	#### wuzzuf analysis
	def wuzzuf_exp(df1):
	top10_job_title = df1['Title'].value_counts()[:10]
	fig1 = px.bar(y=top10_job_title.values,
	x=top10_job_title.index,
	color = top10_job_title.index,
	color_discrete_sequence=px.colors.sequential.deep,
	text=top10_job_title.values,
	title= 'Top 10 Job Titles',
	template= 'plotly_dark')
	fig1.update_layout(height=500,width=500,
	xaxis_title="Job Titles",
	yaxis_title="count",
	font = dict(size=17,family="Franklin Gothic"))
	st.plotly_chart(fig1)

	type_grouped = df1['Career_Level'].value_counts()
	#e_type = ['Full-Time','Part-Time','Contract','Freelance']
	e_type =dict(df1['Career_Level'].value_counts()).keys()
	fig2 = px.bar(x = e_type, y = type_grouped.values,
	color = type_grouped.index,
	color_discrete_sequence=px.colors.sequential.dense,
	template = 'plotly_dark',
	text = type_grouped.values, title = 'Career Level Distribution')
	fig2.update_layout( height=500, width=500,
	xaxis_title="Career Level",
	yaxis_title="count",
	font = dict(size=17,family="Franklin Gothic"))
	fig2.update_traces(width=0.5)
	st.plotly_chart(fig2)
	residence = df1['Location'].value_counts()
	top10_employee_location = residence[:10]
	fig3 = px.bar(y=top10_employee_location.values,
	x=top10_employee_location.index,
	color = top10_employee_location.index,
	color_discrete_sequence=px.colors.sequential.deep,
	text=top10_employee_location.values,
	title= 'Top 10 Location of job',
	template= 'plotly_dark')
	fig3.update_layout(height=500,width=500,
	xaxis_title="Location of job",
	yaxis_title="count",
	font = dict(size=17,family="Franklin Gothic"))
	st.plotly_chart(fig3)

	type_grouped = df1['Experience_Needed'].value_counts()
	#e_type = ['Full-Time','Part-Time','Contract','Freelance']
	e_type =dict(df1['Experience_Needed'].value_counts()).keys()
	fig4 = px.bar(x = e_type, y = type_grouped.values,
	color = type_grouped.index,
	color_discrete_sequence=px.colors.sequential.dense,
	template = 'plotly_dark',
	text = type_grouped.values, title = ' Experience Level Distribution')
	fig4.update_layout(height=500,width=500,
	xaxis_title=" Experience Level (years)",
	yaxis_title="count",
	font = dict(size=17,family="Franklin Gothic"))
	fig4.update_traces(width=0.5)
	st.plotly_chart(fig4)
	return



	#########################
	### linkedin analysis

	def linkedin_exp(df1):
	top10_job_title = df1['Title'].value_counts()[:10]
	fig1 = px.bar(y=top10_job_title.values,
	x=top10_job_title.index,
	color = top10_job_title.index,
	color_discrete_sequence=px.colors.sequential.deep,
	text=top10_job_title.values,
	title= 'Top 10 Job Titles',
	template= 'plotly_dark')
	fig1.update_layout(height=500,width=500,
	xaxis_title="Job Titles",
	yaxis_title="count",
	font = dict(size=17,family="Franklin Gothic"))
	st.plotly_chart(fig1)

	type_grouped = df1['Employment type'].value_counts()
	#e_type = ['Full-Time','Part-Time','Contract','Freelance']
	e_type =dict(df1['Employment type'].value_counts()).keys()
	fig2 = px.bar(x = e_type, y = type_grouped.values,
	color = type_grouped.index,
	color_discrete_sequence=px.colors.sequential.dense,
	template = 'plotly_dark',
	text = type_grouped.values, title = 'Employment type Distribution')
	fig2.update_layout( height=500, width=500,
	xaxis_title="Employment type",
	yaxis_title="count",
	font = dict(size=17,family="Franklin Gothic"))
	fig2.update_traces(width=0.5)
	st.plotly_chart(fig2)
	residence = df1['Location'].value_counts()
	top10_employee_location = residence[:10]
	fig3 = px.bar(y=top10_employee_location.values,
	x=top10_employee_location.index,
	color = top10_employee_location.index,
	color_discrete_sequence=px.colors.sequential.deep,
	text=top10_employee_location.values,
	title= 'Top 10 Location of job',
	template= 'plotly_dark')
	fig3.update_layout(height=500,width=500,
	xaxis_title="Location of job",
	yaxis_title="count",
	font = dict(size=17,family="Franklin Gothic"))
	st.plotly_chart(fig3)

	type_grouped = df1['Seniority level'].value_counts()
	#e_type = ['Full-Time','Part-Time','Contract','Freelance']
	e_type =dict(df1['Seniority level'].value_counts()).keys()
	fig4 = px.bar(x = e_type, y = type_grouped.values,
	color = type_grouped.index,
	color_discrete_sequence=px.colors.sequential.dense,
	template = 'plotly_dark',
	text = type_grouped.values, title = 'Seniority level Distribution')
	fig4.update_layout(height=500,width=500,
	xaxis_title="Seniority level",
	yaxis_title="count",
	font = dict(size=17,family="Franklin Gothic"))
	fig4.update_traces(width=0.5)
	st.plotly_chart(fig4)
	return