Job.web.scrapping / data_analysis.py
Yassmen's picture
Update data_analysis.py
c6e3370 verified
#### function to show map for loaction of the job
import time
import matplotlib.pyplot as plt
import streamlit as st
import seaborn as sns
import matplotlib as mpl
import plotly
import plotly.express as px
import plotly.graph_objs as go
import plotly.offline as py
from plotly.offline import iplot
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import pandas as pd
def map_bubble(df):
import requests
import urllib.parse
g =[]
for i in range(len(df.Location)):
if df.Location.loc[i].split(","):
g.append(df.Location.loc[i].split(",")[0])
else:
g.append(df.Location.loc[i])
df['new_loc']=g
if 'country' in df.columns:
df["full_location"] = df["new_loc"] + ", " +df["country"]
dict_cities = dict(df.full_location.value_counts())
else :
dict_cities = dict(df.new_loc.value_counts())
lat = []
lon = []
bubble_df = pd.DataFrame()
add=[]
val=[]
try:
for address in dict_cities.keys():
url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(address) +'?format=json'
response = requests.get(url).json()
lat.append(response[0]["lat"])
lon.append(response[0]["lon"])
add.append(address)
val.append(dict_cities[address])
except:
pass
bubble_df['address'] =add
bubble_df['lat'] = lat
bubble_df['lon'] = lon
bubble_df['value'] = val
# import the library
import folium
# Make an empty map
m = folium.Map(location=[20,0], tiles="OpenStreetMap", zoom_start=2)
# add marker one by one on the map
for i in range(0,len(bubble_df)):
folium.Circle(
location=[bubble_df.iloc[i]['lat'], bubble_df.iloc[i]['lon']],
popup=bubble_df.iloc[i][['address','value']].values,
radius=float(bubble_df.iloc[i]['value'])*500,
color='#69b3a2',
fill=True,
fill_color='#69b3a2'
).add_to(m)
m
# Show the map again
return m
##########################
#########################
#### wuzzuf analysis
def wuzzuf_exp(df1):
top10_job_title = df1['Title'].value_counts()[:10]
fig1 = px.bar(y=top10_job_title.values,
x=top10_job_title.index,
color = top10_job_title.index,
color_discrete_sequence=px.colors.sequential.deep,
text=top10_job_title.values,
title= 'Top 10 Job Titles',
template= 'plotly_dark')
fig1.update_layout(height=500,width=500,
xaxis_title="Job Titles",
yaxis_title="count",
font = dict(size=17,family="Franklin Gothic"))
st.plotly_chart(fig1)
type_grouped = df1['Career_Level'].value_counts()
#e_type = ['Full-Time','Part-Time','Contract','Freelance']
e_type =dict(df1['Career_Level'].value_counts()).keys()
fig2 = px.bar(x = e_type, y = type_grouped.values,
color = type_grouped.index,
color_discrete_sequence=px.colors.sequential.dense,
template = 'plotly_dark',
text = type_grouped.values, title = 'Career Level Distribution')
fig2.update_layout( height=500, width=500,
xaxis_title="Career Level",
yaxis_title="count",
font = dict(size=17,family="Franklin Gothic"))
fig2.update_traces(width=0.5)
st.plotly_chart(fig2)
residence = df1['Location'].value_counts()
top10_employee_location = residence[:10]
fig3 = px.bar(y=top10_employee_location.values,
x=top10_employee_location.index,
color = top10_employee_location.index,
color_discrete_sequence=px.colors.sequential.deep,
text=top10_employee_location.values,
title= 'Top 10 Location of job',
template= 'plotly_dark')
fig3.update_layout(height=500,width=500,
xaxis_title="Location of job",
yaxis_title="count",
font = dict(size=17,family="Franklin Gothic"))
st.plotly_chart(fig3)
type_grouped = df1['Experience_Needed'].value_counts()
#e_type = ['Full-Time','Part-Time','Contract','Freelance']
e_type =dict(df1['Experience_Needed'].value_counts()).keys()
fig4 = px.bar(x = e_type, y = type_grouped.values,
color = type_grouped.index,
color_discrete_sequence=px.colors.sequential.dense,
template = 'plotly_dark',
text = type_grouped.values, title = ' Experience Level Distribution')
fig4.update_layout(height=500,width=500,
xaxis_title=" Experience Level (years)",
yaxis_title="count",
font = dict(size=17,family="Franklin Gothic"))
fig4.update_traces(width=0.5)
st.plotly_chart(fig4)
return
#########################
### linkedin analysis
def linkedin_exp(df1):
top10_job_title = df1['Title'].value_counts()[:10]
fig1 = px.bar(y=top10_job_title.values,
x=top10_job_title.index,
color = top10_job_title.index,
color_discrete_sequence=px.colors.sequential.deep,
text=top10_job_title.values,
title= 'Top 10 Job Titles',
template= 'plotly_dark')
fig1.update_layout(height=500,width=500,
xaxis_title="Job Titles",
yaxis_title="count",
font = dict(size=17,family="Franklin Gothic"))
st.plotly_chart(fig1)
type_grouped = df1['Employment type'].value_counts()
#e_type = ['Full-Time','Part-Time','Contract','Freelance']
e_type =dict(df1['Employment type'].value_counts()).keys()
fig2 = px.bar(x = e_type, y = type_grouped.values,
color = type_grouped.index,
color_discrete_sequence=px.colors.sequential.dense,
template = 'plotly_dark',
text = type_grouped.values, title = 'Employment type Distribution')
fig2.update_layout( height=500, width=500,
xaxis_title="Employment type",
yaxis_title="count",
font = dict(size=17,family="Franklin Gothic"))
fig2.update_traces(width=0.5)
st.plotly_chart(fig2)
residence = df1['Location'].value_counts()
top10_employee_location = residence[:10]
fig3 = px.bar(y=top10_employee_location.values,
x=top10_employee_location.index,
color = top10_employee_location.index,
color_discrete_sequence=px.colors.sequential.deep,
text=top10_employee_location.values,
title= 'Top 10 Location of job',
template= 'plotly_dark')
fig3.update_layout(height=500,width=500,
xaxis_title="Location of job",
yaxis_title="count",
font = dict(size=17,family="Franklin Gothic"))
st.plotly_chart(fig3)
type_grouped = df1['Seniority level'].value_counts()
#e_type = ['Full-Time','Part-Time','Contract','Freelance']
e_type =dict(df1['Seniority level'].value_counts()).keys()
fig4 = px.bar(x = e_type, y = type_grouped.values,
color = type_grouped.index,
color_discrete_sequence=px.colors.sequential.dense,
template = 'plotly_dark',
text = type_grouped.values, title = 'Seniority level Distribution')
fig4.update_layout(height=500,width=500,
xaxis_title="Seniority level",
yaxis_title="count",
font = dict(size=17,family="Franklin Gothic"))
fig4.update_traces(width=0.5)
st.plotly_chart(fig4)
return