Yassmen commited on
Commit
6343888
1 Parent(s): 6d228e9

Create data_analysis.py

Browse files
Files changed (1) hide show
  1. data_analysis.py +208 -0
data_analysis.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### function to show map for loaction of the job
2
+ import time
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import matplotlib as mpl
6
+ import plotly
7
+ import plotly.express as px
8
+ import plotly.graph_objs as go
9
+ import plotly.offline as py
10
+ from plotly.offline import iplot
11
+ from plotly.subplots import make_subplots
12
+ import plotly.figure_factory as ff
13
+
14
+
15
+
16
+ def map_bubble(df):
17
+
18
+ import requests
19
+ import urllib.parse
20
+ g =[]
21
+ for i in range(len(df.Location)):
22
+
23
+ if df.Location.loc[i].split(","):
24
+ g.append(df.Location.loc[i].split(",")[0])
25
+ else:
26
+ g.append(df.Location.loc[i])
27
+ df['new_loc']=g
28
+ if 'country' in df.columns:
29
+ df["full_location"] = df["new_loc"] + ", " +df["country"]
30
+ dict_cities = dict(df.full_location.value_counts())
31
+ else :
32
+ dict_cities = dict(df.new_loc.value_counts())
33
+ lat = []
34
+ lon = []
35
+ bubble_df = pd.DataFrame()
36
+ add=[]
37
+ val=[]
38
+ try:
39
+ for address in dict_cities.keys():
40
+ url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(address) +'?format=json'
41
+
42
+ response = requests.get(url).json()
43
+ lat.append(response[0]["lat"])
44
+ lon.append(response[0]["lon"])
45
+ add.append(address)
46
+ val.append(dict_cities[address])
47
+ except:
48
+ pass
49
+
50
+ bubble_df['address'] =add
51
+ bubble_df['lat'] = lat
52
+ bubble_df['lon'] = lon
53
+ bubble_df['value'] = val
54
+
55
+
56
+ # import the library
57
+ import folium
58
+
59
+ # Make an empty map
60
+ m = folium.Map(location=[20,0], tiles="OpenStreetMap", zoom_start=2)
61
+ # add marker one by one on the map
62
+ for i in range(0,len(bubble_df)):
63
+ folium.Circle(
64
+ location=[bubble_df.iloc[i]['lat'], bubble_df.iloc[i]['lon']],
65
+
66
+ popup=bubble_df.iloc[i][['address','value']].values,
67
+ radius=float(bubble_df.iloc[i]['value'])*500,
68
+ color='#69b3a2',
69
+ fill=True,
70
+ fill_color='#69b3a2'
71
+ ).add_to(m)
72
+ m
73
+ # Show the map again
74
+ return m
75
+
76
+
77
+ ##########################
78
+
79
+
80
+
81
+
82
+
83
+ #########################
84
+ #### wuzzuf analysis
85
+ def wuzzuf_exp(df1):
86
+ top10_job_title = df1['Title'].value_counts()[:10]
87
+ fig1 = px.bar(y=top10_job_title.values,
88
+ x=top10_job_title.index,
89
+ color = top10_job_title.index,
90
+ color_discrete_sequence=px.colors.sequential.deep,
91
+ text=top10_job_title.values,
92
+ title= 'Top 10 Job Titles',
93
+ template= 'plotly_dark')
94
+ fig1.update_layout(height=500,width=500,
95
+ xaxis_title="Job Titles",
96
+ yaxis_title="count",
97
+ font = dict(size=17,family="Franklin Gothic"))
98
+ st.plotly_chart(fig1)
99
+
100
+ type_grouped = df1['Career_Level'].value_counts()
101
+ #e_type = ['Full-Time','Part-Time','Contract','Freelance']
102
+ e_type =dict(df1['Career_Level'].value_counts()).keys()
103
+ fig2 = px.bar(x = e_type, y = type_grouped.values,
104
+ color = type_grouped.index,
105
+ color_discrete_sequence=px.colors.sequential.dense,
106
+ template = 'plotly_dark',
107
+ text = type_grouped.values, title = 'Career Level Distribution')
108
+ fig2.update_layout( height=500, width=500,
109
+ xaxis_title="Career Level",
110
+ yaxis_title="count",
111
+ font = dict(size=17,family="Franklin Gothic"))
112
+ fig2.update_traces(width=0.5)
113
+ st.plotly_chart(fig2)
114
+ residence = df1['Location'].value_counts()
115
+ top10_employee_location = residence[:10]
116
+ fig3 = px.bar(y=top10_employee_location.values,
117
+ x=top10_employee_location.index,
118
+ color = top10_employee_location.index,
119
+ color_discrete_sequence=px.colors.sequential.deep,
120
+ text=top10_employee_location.values,
121
+ title= 'Top 10 Location of job',
122
+ template= 'plotly_dark')
123
+ fig3.update_layout(height=500,width=500,
124
+ xaxis_title="Location of job",
125
+ yaxis_title="count",
126
+ font = dict(size=17,family="Franklin Gothic"))
127
+ st.plotly_chart(fig3)
128
+
129
+ type_grouped = df1['Experience_Needed'].value_counts()
130
+ #e_type = ['Full-Time','Part-Time','Contract','Freelance']
131
+ e_type =dict(df1['Experience_Needed'].value_counts()).keys()
132
+ fig4 = px.bar(x = e_type, y = type_grouped.values,
133
+ color = type_grouped.index,
134
+ color_discrete_sequence=px.colors.sequential.dense,
135
+ template = 'plotly_dark',
136
+ text = type_grouped.values, title = ' Experience Level Distribution')
137
+ fig4.update_layout(height=500,width=500,
138
+ xaxis_title=" Experience Level (years)",
139
+ yaxis_title="count",
140
+ font = dict(size=17,family="Franklin Gothic"))
141
+ fig4.update_traces(width=0.5)
142
+ st.plotly_chart(fig4)
143
+ return
144
+
145
+
146
+
147
+ #########################
148
+ ### linkedin analysis
149
+
150
+ def linkedin_exp(df1):
151
+ top10_job_title = df1['Title'].value_counts()[:10]
152
+ fig1 = px.bar(y=top10_job_title.values,
153
+ x=top10_job_title.index,
154
+ color = top10_job_title.index,
155
+ color_discrete_sequence=px.colors.sequential.deep,
156
+ text=top10_job_title.values,
157
+ title= 'Top 10 Job Titles',
158
+ template= 'plotly_dark')
159
+ fig1.update_layout(height=500,width=500,
160
+ xaxis_title="Job Titles",
161
+ yaxis_title="count",
162
+ font = dict(size=17,family="Franklin Gothic"))
163
+ st.plotly_chart(fig1)
164
+
165
+ type_grouped = df1['Employment type'].value_counts()
166
+ #e_type = ['Full-Time','Part-Time','Contract','Freelance']
167
+ e_type =dict(df1['Employment type'].value_counts()).keys()
168
+ fig2 = px.bar(x = e_type, y = type_grouped.values,
169
+ color = type_grouped.index,
170
+ color_discrete_sequence=px.colors.sequential.dense,
171
+ template = 'plotly_dark',
172
+ text = type_grouped.values, title = 'Employment type Distribution')
173
+ fig2.update_layout( height=500, width=500,
174
+ xaxis_title="Employment type",
175
+ yaxis_title="count",
176
+ font = dict(size=17,family="Franklin Gothic"))
177
+ fig2.update_traces(width=0.5)
178
+ st.plotly_chart(fig2)
179
+ residence = df1['Location'].value_counts()
180
+ top10_employee_location = residence[:10]
181
+ fig3 = px.bar(y=top10_employee_location.values,
182
+ x=top10_employee_location.index,
183
+ color = top10_employee_location.index,
184
+ color_discrete_sequence=px.colors.sequential.deep,
185
+ text=top10_employee_location.values,
186
+ title= 'Top 10 Location of job',
187
+ template= 'plotly_dark')
188
+ fig3.update_layout(height=500,width=500,
189
+ xaxis_title="Location of job",
190
+ yaxis_title="count",
191
+ font = dict(size=17,family="Franklin Gothic"))
192
+ st.plotly_chart(fig3)
193
+
194
+ type_grouped = df1['Seniority level'].value_counts()
195
+ #e_type = ['Full-Time','Part-Time','Contract','Freelance']
196
+ e_type =dict(df1['Seniority level'].value_counts()).keys()
197
+ fig4 = px.bar(x = e_type, y = type_grouped.values,
198
+ color = type_grouped.index,
199
+ color_discrete_sequence=px.colors.sequential.dense,
200
+ template = 'plotly_dark',
201
+ text = type_grouped.values, title = 'Seniority level Distribution')
202
+ fig4.update_layout(height=500,width=500,
203
+ xaxis_title="Seniority level",
204
+ yaxis_title="count",
205
+ font = dict(size=17,family="Franklin Gothic"))
206
+ fig4.update_traces(width=0.5)
207
+ st.plotly_chart(fig4)
208
+ return