Tryfonas commited on
Commit
85cd1e1
·
verified ·
1 Parent(s): e962e08

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ kiva_loans.csv filter=lfs diff=lfs merge=lfs -text
37
+ loan_theme_ids.csv filter=lfs diff=lfs merge=lfs -text
KIVA___BDS24_Assignment_Karmiris_Tryfonas.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
app - Copy.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import altair as alt
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+
8
+ # Function to load the dataset
9
+ @st.cache_data # Cache the function to enhance performance
10
+ def load_data():
11
+ # Define the file path
12
+ file_path = 'https://raw.githubusercontent.com/aaubs/ds-master/main/apps/M1-attrition-streamlit/HR-Employee-Attrition-synth.csv'
13
+
14
+ # Load the CSV file into a pandas dataframe
15
+ df = pd.read_csv(file_path)
16
+
17
+ # Create age groups and add as a new column
18
+ bin_edges = [18, 25, 35, 45, 60]
19
+ bin_labels = ['18-24', '25-34', '35-44', '45-60']
20
+ df['AgeGroup'] = pd.cut(df['Age'], bins=bin_edges, labels=bin_labels, right=False)
21
+
22
+ return df
23
+
24
+ # Load the data using the defined function
25
+ df = load_data()
26
+
27
+ # Set the app title and sidebar header
28
+ st.title("Employee Attrition Dashboard 😊📈")
29
+ st.sidebar.header("Filters 📊")
30
+
31
+ # Introduction
32
+
33
+ # HR Attrition Dashboard
34
+
35
+ st.markdown("""
36
+ Welcome to the HR Attrition Dashboard. In the backdrop of rising employee turnovers, HR departments are stressing the significance of predicting and understanding employee departures. Through the lens of data analytics, this dashboard unveils the deeper causes of employee churn and proposes strategies to boost employee retention.
37
+ """)
38
+ with st.expander("📊 **Objective**"):
39
+ st.markdown("""
40
+ At the heart of this dashboard is the mission to visually decode data, equipping HR experts with insights to tackle these queries:
41
+ - Which company factions face a greater likelihood of employee exits?
42
+ - What might be pushing these individuals to part ways?
43
+ - Observing the discerned trends, what incentives might hold the key to decreasing the attrition rate?
44
+ """
45
+ )
46
+
47
+ # Tutorial Expander
48
+ with st.expander("How to Use the Dashboard 📚"):
49
+ st.markdown("""
50
+ 1. **Filter Data** - Use the sidebar filters to narrow down specific data sets.
51
+ 2. **Visualize Data** - From the dropdown, select a visualization type to view patterns.
52
+ 3. **Insights & Recommendations** - Scroll down to see insights derived from the visualizations and actionable recommendations.
53
+ """)
54
+
55
+
56
+ # Sidebar filter: Age Group
57
+ selected_age_group = st.sidebar.multiselect("Select Age Groups 🕰️", df['AgeGroup'].unique().tolist(), default=df['AgeGroup'].unique().tolist())
58
+ if not selected_age_group:
59
+ st.warning("Please select an age group from the sidebar ⚠️")
60
+ st.stop()
61
+ filtered_df = df[df['AgeGroup'].isin(selected_age_group)]
62
+
63
+ # Sidebar filter: Department
64
+ departments = df['Department'].unique().tolist()
65
+ selected_department = st.sidebar.multiselect("Select Departments 🏢", departments, default=departments)
66
+ if not selected_department:
67
+ st.warning("Please select a department from the sidebar ⚠️")
68
+ st.stop()
69
+ filtered_df = filtered_df[filtered_df['Department'].isin(selected_department)]
70
+
71
+ # Sidebar filter: Monthly Income Range
72
+ min_income = int(df['MonthlyIncome'].min())
73
+ max_income = int(df['MonthlyIncome'].max())
74
+ income_range = st.sidebar.slider("Select Monthly Income Range 💰", min_income, max_income, (min_income, max_income))
75
+ filtered_df = filtered_df[(filtered_df['MonthlyIncome'] >= income_range[0]) & (filtered_df['MonthlyIncome'] <= income_range[1])]
76
+
77
+ # Sidebar filter: Job Satisfaction Level
78
+ satisfaction_levels = sorted(df['JobSatisfaction'].unique().tolist())
79
+ selected_satisfaction = st.sidebar.multiselect("Select Job Satisfaction Levels 😊", satisfaction_levels, default=satisfaction_levels)
80
+ if not selected_satisfaction:
81
+ st.warning("Please select a job satisfaction level from the sidebar ⚠️")
82
+ st.stop()
83
+ filtered_df = filtered_df[filtered_df['JobSatisfaction'].isin(selected_satisfaction)]
84
+
85
+ # Displaying the Attrition Analysis header
86
+ st.header("Attrition Analysis 📊")
87
+
88
+ # Dropdown to select the type of visualization
89
+ visualization_option = st.selectbox(
90
+ "Select Visualization 🎨",
91
+ ["Attrition by Age Group",
92
+ "KDE Plot: Distance from Home by Attrition",
93
+ "Attrition by Job Role",
94
+ "Attrition Distribution by Gender",
95
+ "MonthlyRate and DailyRate by JobLevel"]
96
+ )
97
+
98
+ # Visualizations based on user selection
99
+ if visualization_option == "Attrition by Age Group":
100
+ # Bar chart for attrition by age group
101
+ chart = alt.Chart(filtered_df).mark_bar().encode(
102
+ x='AgeGroup',
103
+ y='count()',
104
+ color='Attrition'
105
+ ).properties(
106
+ title='Attrition Rate by Age Group'
107
+ )
108
+ st.altair_chart(chart, use_container_width=True)
109
+
110
+ elif visualization_option == "KDE Plot: Distance from Home by Attrition":
111
+ # KDE plot for Distance from Home based on Attrition
112
+ plt.figure(figsize=(10, 6))
113
+ sns.kdeplot(data=filtered_df, x='DistanceFromHome', hue='Attrition', fill=True, palette='Set2')
114
+ plt.xlabel('Distance From Home')
115
+ plt.ylabel('Density')
116
+ plt.title('KDE Plot of Distance From Home by Attrition')
117
+ st.pyplot(plt)
118
+
119
+ elif visualization_option == "Attrition by Job Role":
120
+ # Bar chart for attrition by job role
121
+ chart = alt.Chart(filtered_df).mark_bar().encode(
122
+ y='JobRole',
123
+ x='count()',
124
+ color='Attrition'
125
+ ).properties(
126
+ title='Attrition by Job Role'
127
+ )
128
+ st.altair_chart(chart, use_container_width=True)
129
+
130
+ elif visualization_option == "Attrition Distribution by Gender":
131
+ # Pie chart for attrition distribution by gender
132
+ pie_chart_data = filtered_df[filtered_df['Attrition'] == 'Yes']['Gender'].value_counts().reset_index()
133
+ pie_chart_data.columns = ['Gender', 'count']
134
+
135
+ chart = alt.Chart(pie_chart_data).mark_arc().encode(
136
+ theta='count:Q',
137
+ color='Gender:N',
138
+ tooltip=['Gender', 'count']
139
+ ).properties(
140
+ title='Attrition Distribution by Gender',
141
+ width=300,
142
+ height=300
143
+ ).project('identity')
144
+ st.altair_chart(chart, use_container_width=True)
145
+
146
+ elif visualization_option == "MonthlyRate and DailyRate by JobLevel":
147
+ # Boxplots for MonthlyRate and DailyRate by JobLevel
148
+ fig, ax = plt.subplots(1, 2, figsize=(15, 7))
149
+
150
+ # MonthlyRate by JobLevel
151
+ sns.boxplot(x="JobLevel", y="MonthlyRate", data=filtered_df, ax=ax[0], hue="JobLevel", palette='Set2', legend=False)
152
+ ax[0].set_title('MonthlyRate by JobLevel')
153
+ ax[0].set_xlabel('Job Level')
154
+ ax[0].set_ylabel('Monthly Rate')
155
+
156
+ # DailyRate by JobLevel
157
+ sns.boxplot(x="JobLevel", y="DailyRate", data=filtered_df, ax=ax[1], hue="JobLevel", palette='Set2', legend=False)
158
+ ax[1].set_title('DailyRate by JobLevel')
159
+ ax[1].set_xlabel('Job Level')
160
+ ax[1].set_ylabel('Daily Rate')
161
+
162
+ plt.tight_layout()
163
+ st.pyplot(fig)
164
+
165
+ # Display dataset overview
166
+ st.header("Dataset Overview")
167
+ st.dataframe(df.describe())
168
+
169
+
170
+ # Insights from Visualization Section Expander
171
+ with st.expander("Insights from Visualization 🧠"):
172
+ st.markdown("""
173
+ 1. **Age Groups & Attrition** - The 'Attrition by Age Group' plot showcases which age brackets face higher attrition.
174
+ 2. **Home Distance's Impact** - The 'KDE Plot: Distance from Home by Attrition' visualizes if being farther away influences leaving tendencies.
175
+ 3. **Roles & Attrition** - 'Attrition by Job Role' reveals which roles might be more attrition-prone.
176
+ 4. **Gender & Attrition** - The pie chart for 'Attrition Distribution by Gender' provides insights into any gender-based patterns.
177
+ 5. **Earnings Patterns** - 'MonthlyRate and DailyRate by JobLevel' boxplots display the compensation distribution across job levels.
178
+ """)
179
+
180
+ # Recommendations Expander
181
+ with st.expander("Recommendations for Action 🌟"):
182
+ st.markdown("""
183
+ - 🎁 **Incentive Programs:** Introduce incentives tailored for groups showing higher attrition tendencies.
184
+ - 🏡 **Remote Work Options:** Providing flexibility, especially for those living farther from the workplace, could reduce attrition.
185
+ - 🚀 **Training & Growth:** Invest in employee development, especially in roles with higher attrition rates.
186
+ - 👫 **Gender Equality:** Foster an environment that supports equal opportunities regardless of gender.
187
+ - 💸 **Compensation Review:** Regularly review and adjust compensation structures to stay competitive and retain talent.
188
+ """)
app.py CHANGED
@@ -4,185 +4,38 @@ import pandas as pd
4
  import altair as alt
5
  import matplotlib.pyplot as plt
6
  import seaborn as sns
 
7
 
8
- # Function to load the dataset
9
- @st.cache_data # Cache the function to enhance performance
10
- def load_data():
11
- # Define the file path
12
- file_path = 'https://raw.githubusercontent.com/aaubs/ds-master/main/apps/M1-attrition-streamlit/HR-Employee-Attrition-synth.csv'
13
-
14
- # Load the CSV file into a pandas dataframe
15
- df = pd.read_csv(file_path)
16
 
17
- # Create age groups and add as a new column
18
- bin_edges = [18, 25, 35, 45, 60]
19
- bin_labels = ['18-24', '25-34', '35-44', '45-60']
20
- df['AgeGroup'] = pd.cut(df['Age'], bins=bin_edges, labels=bin_labels, right=False)
21
 
22
- return df
23
 
24
- # Load the data using the defined function
25
- df = load_data()
26
 
27
- # Set the app title and sidebar header
28
- st.title("Employee Attrition Dashboard 😊📈")
29
- st.sidebar.header("Filters 📊")
30
 
31
- # Introduction
 
32
 
33
- # HR Attrition Dashboard
 
 
34
 
35
- st.markdown("""
36
- Welcome to the HR Attrition Dashboard. In the backdrop of rising employee turnovers, HR departments are stressing the significance of predicting and understanding employee departures. Through the lens of data analytics, this dashboard unveils the deeper causes of employee churn and proposes strategies to boost employee retention.
37
- """)
38
- with st.expander("📊 **Objective**"):
39
- st.markdown("""
40
- At the heart of this dashboard is the mission to visually decode data, equipping HR experts with insights to tackle these queries:
41
- - Which company factions face a greater likelihood of employee exits?
42
- - What might be pushing these individuals to part ways?
43
- - Observing the discerned trends, what incentives might hold the key to decreasing the attrition rate?
44
- """
45
- )
46
-
47
- # Tutorial Expander
48
- with st.expander("How to Use the Dashboard 📚"):
49
- st.markdown("""
50
- 1. **Filter Data** - Use the sidebar filters to narrow down specific data sets.
51
- 2. **Visualize Data** - From the dropdown, select a visualization type to view patterns.
52
- 3. **Insights & Recommendations** - Scroll down to see insights derived from the visualizations and actionable recommendations.
53
- """)
54
 
 
 
55
 
56
- # Sidebar filter: Age Group
57
- selected_age_group = st.sidebar.multiselect("Select Age Groups 🕰️", df['AgeGroup'].unique().tolist(), default=df['AgeGroup'].unique().tolist())
58
- if not selected_age_group:
59
- st.warning("Please select an age group from the sidebar ⚠️")
60
- st.stop()
61
- filtered_df = df[df['AgeGroup'].isin(selected_age_group)]
62
 
63
- # Sidebar filter: Department
64
- departments = df['Department'].unique().tolist()
65
- selected_department = st.sidebar.multiselect("Select Departments 🏢", departments, default=departments)
66
- if not selected_department:
67
- st.warning("Please select a department from the sidebar ⚠️")
68
- st.stop()
69
- filtered_df = filtered_df[filtered_df['Department'].isin(selected_department)]
70
 
71
- # Sidebar filter: Monthly Income Range
72
- min_income = int(df['MonthlyIncome'].min())
73
- max_income = int(df['MonthlyIncome'].max())
74
- income_range = st.sidebar.slider("Select Monthly Income Range 💰", min_income, max_income, (min_income, max_income))
75
- filtered_df = filtered_df[(filtered_df['MonthlyIncome'] >= income_range[0]) & (filtered_df['MonthlyIncome'] <= income_range[1])]
76
-
77
- # Sidebar filter: Job Satisfaction Level
78
- satisfaction_levels = sorted(df['JobSatisfaction'].unique().tolist())
79
- selected_satisfaction = st.sidebar.multiselect("Select Job Satisfaction Levels 😊", satisfaction_levels, default=satisfaction_levels)
80
- if not selected_satisfaction:
81
- st.warning("Please select a job satisfaction level from the sidebar ⚠️")
82
- st.stop()
83
- filtered_df = filtered_df[filtered_df['JobSatisfaction'].isin(selected_satisfaction)]
84
-
85
- # Displaying the Attrition Analysis header
86
- st.header("Attrition Analysis 📊")
87
-
88
- # Dropdown to select the type of visualization
89
- visualization_option = st.selectbox(
90
- "Select Visualization 🎨",
91
- ["Attrition by Age Group",
92
- "KDE Plot: Distance from Home by Attrition",
93
- "Attrition by Job Role",
94
- "Attrition Distribution by Gender",
95
- "MonthlyRate and DailyRate by JobLevel"]
96
- )
97
-
98
- # Visualizations based on user selection
99
- if visualization_option == "Attrition by Age Group":
100
- # Bar chart for attrition by age group
101
- chart = alt.Chart(filtered_df).mark_bar().encode(
102
- x='AgeGroup',
103
- y='count()',
104
- color='Attrition'
105
- ).properties(
106
- title='Attrition Rate by Age Group'
107
- )
108
- st.altair_chart(chart, use_container_width=True)
109
-
110
- elif visualization_option == "KDE Plot: Distance from Home by Attrition":
111
- # KDE plot for Distance from Home based on Attrition
112
- plt.figure(figsize=(10, 6))
113
- sns.kdeplot(data=filtered_df, x='DistanceFromHome', hue='Attrition', fill=True, palette='Set2')
114
- plt.xlabel('Distance From Home')
115
- plt.ylabel('Density')
116
- plt.title('KDE Plot of Distance From Home by Attrition')
117
- st.pyplot(plt)
118
-
119
- elif visualization_option == "Attrition by Job Role":
120
- # Bar chart for attrition by job role
121
- chart = alt.Chart(filtered_df).mark_bar().encode(
122
- y='JobRole',
123
- x='count()',
124
- color='Attrition'
125
- ).properties(
126
- title='Attrition by Job Role'
127
- )
128
- st.altair_chart(chart, use_container_width=True)
129
-
130
- elif visualization_option == "Attrition Distribution by Gender":
131
- # Pie chart for attrition distribution by gender
132
- pie_chart_data = filtered_df[filtered_df['Attrition'] == 'Yes']['Gender'].value_counts().reset_index()
133
- pie_chart_data.columns = ['Gender', 'count']
134
-
135
- chart = alt.Chart(pie_chart_data).mark_arc().encode(
136
- theta='count:Q',
137
- color='Gender:N',
138
- tooltip=['Gender', 'count']
139
- ).properties(
140
- title='Attrition Distribution by Gender',
141
- width=300,
142
- height=300
143
- ).project('identity')
144
- st.altair_chart(chart, use_container_width=True)
145
-
146
- elif visualization_option == "MonthlyRate and DailyRate by JobLevel":
147
- # Boxplots for MonthlyRate and DailyRate by JobLevel
148
- fig, ax = plt.subplots(1, 2, figsize=(15, 7))
149
-
150
- # MonthlyRate by JobLevel
151
- sns.boxplot(x="JobLevel", y="MonthlyRate", data=filtered_df, ax=ax[0], hue="JobLevel", palette='Set2', legend=False)
152
- ax[0].set_title('MonthlyRate by JobLevel')
153
- ax[0].set_xlabel('Job Level')
154
- ax[0].set_ylabel('Monthly Rate')
155
-
156
- # DailyRate by JobLevel
157
- sns.boxplot(x="JobLevel", y="DailyRate", data=filtered_df, ax=ax[1], hue="JobLevel", palette='Set2', legend=False)
158
- ax[1].set_title('DailyRate by JobLevel')
159
- ax[1].set_xlabel('Job Level')
160
- ax[1].set_ylabel('Daily Rate')
161
-
162
- plt.tight_layout()
163
- st.pyplot(fig)
164
-
165
- # Display dataset overview
166
- st.header("Dataset Overview")
167
- st.dataframe(df.describe())
168
-
169
-
170
- # Insights from Visualization Section Expander
171
- with st.expander("Insights from Visualization 🧠"):
172
- st.markdown("""
173
- 1. **Age Groups & Attrition** - The 'Attrition by Age Group' plot showcases which age brackets face higher attrition.
174
- 2. **Home Distance's Impact** - The 'KDE Plot: Distance from Home by Attrition' visualizes if being farther away influences leaving tendencies.
175
- 3. **Roles & Attrition** - 'Attrition by Job Role' reveals which roles might be more attrition-prone.
176
- 4. **Gender & Attrition** - The pie chart for 'Attrition Distribution by Gender' provides insights into any gender-based patterns.
177
- 5. **Earnings Patterns** - 'MonthlyRate and DailyRate by JobLevel' boxplots display the compensation distribution across job levels.
178
- """)
179
-
180
- # Recommendations Expander
181
- with st.expander("Recommendations for Action 🌟"):
182
- st.markdown("""
183
- - 🎁 **Incentive Programs:** Introduce incentives tailored for groups showing higher attrition tendencies.
184
- - 🏡 **Remote Work Options:** Providing flexibility, especially for those living farther from the workplace, could reduce attrition.
185
- - 🚀 **Training & Growth:** Invest in employee development, especially in roles with higher attrition rates.
186
- - 👫 **Gender Equality:** Foster an environment that supports equal opportunities regardless of gender.
187
- - 💸 **Compensation Review:** Regularly review and adjust compensation structures to stay competitive and retain talent.
188
- """)
 
4
  import altair as alt
5
  import matplotlib.pyplot as plt
6
  import seaborn as sns
7
+ from scipy.stats import zscore
8
 
9
+ st.title('my shitty app ')
 
 
 
 
 
 
 
10
 
11
+ file_path= 'kiva_loans.csv'
 
 
 
12
 
13
+ df_kiva_loans = pd.read_csv(file_path)
14
 
15
+ df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time','funded_time','posted_time','tags'], axis=1)
 
16
 
17
+ #drop nas on specific columns not all of them, it doesnt affect the task we actually want to do now, but might need for later use
18
+ df_kiva_loans.dropna(subset=['partner_id','borrower_genders'], inplace=True)
 
19
 
20
+ # Calculate Z-scores
21
+ z_scores = zscore(df_kiva_loans['funded_amount'])
22
 
23
+ # Get boolean array indicating the presence of outliers
24
+ df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3)
25
+ df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']]
26
 
27
+ #Grouping by 'repayment interval' and calculating mean,sum and max for funded amount and loan amount.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ selected = st.sidebar.selectbox("Select Variable of Interest", ['country', 'sector', 'repayment_interval'])
30
+ selected2 = st.sidebar.selectbox("Select Variable of Interest", ['funded_amount', 'count'])
31
 
32
+ if selected2 == 'count':
33
+ result = df_kiva_loans.groupby(selected).size().reset_index(name='count')
34
+ result = result.sort_values(by='count', ascending=False)
35
+ else:
36
+ result = df_kiva_loans.groupby(selected)[selected2].sum().reset_index()
37
+ result = result.sort_values(by=selected2, ascending=False)
38
 
39
+ st.title("Aggregated Data")
40
+ st.table(result)
 
 
 
 
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
kiva_loans.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b20efc20de600b27608d69fe07e728b00a075c3db29849e146b717098f778d92
3
+ size 195852823
kiva_mpi_region_locations.csv ADDED
The diff for this file is too large to render. See raw diff
 
loan_theme_ids.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48f3d922eef1d329ba913d0ec3c1b88714014f45ec5940a4084c311f4a455baa
3
+ size 31641314
loan_themes_by_region.csv ADDED
The diff for this file is too large to render. See raw diff