Tryfonas commited on
Commit
e1629ce
·
verified ·
1 Parent(s): be26401

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +29 -18
app.py CHANGED
@@ -41,7 +41,7 @@ if page == "Introduction":
41
  This application provides insights into Kiva loans data.
42
  You can explore the distribution of funded amounts,
43
  analyze top values by selected variables, and visualize
44
- relationships between funded amounts and various factors.
45
  """)
46
 
47
  # Data Overview Page
@@ -61,7 +61,7 @@ elif page == "Data Overview":
61
  title='Distribution of Funded Amounts'
62
  )
63
  st.altair_chart(chart, use_container_width=True)
64
- st.write("This chart shows the distribution of funded amounts for Kiva loans. The x-axis represents the funded amount, while the y-axis shows the count of loans that fall within each bin.")
65
 
66
  # Page 3: Top Values by Selected Variable
67
  elif page == "Top Values by Selected Variable":
@@ -72,7 +72,7 @@ elif page == "Top Values by Selected Variable":
72
 
73
  # Slider to select the number of top values to display
74
  num_columns = st.slider(
75
- "Select Number of Columns to Display",
76
  min_value=5,
77
  max_value=50,
78
  value=10, # default value
@@ -84,17 +84,17 @@ elif page == "Top Values by Selected Variable":
84
  top_values = df_kiva_loans_cleaned.groupby('country')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
85
  x_column = 'country'
86
  count_column = 'count'
87
- description = f"This chart displays the top {num_columns} countries by total funded amount. The blue bars represent the total funded amount, while the red line indicates the count of loans."
88
  elif plot_type == 'repayment_interval':
89
  top_values = df_kiva_loans_cleaned.groupby('repayment_interval')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
90
  x_column = 'repayment_interval'
91
  count_column = 'count'
92
- description = f"This chart shows the top {num_columns} repayment intervals by total funded amount. The blue bars represent the total funded amount, while the red line indicates the count of loans."
93
  else: # sector
94
  top_values = df_kiva_loans_cleaned.groupby('sector')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
95
  x_column = 'sector'
96
  count_column = 'count'
97
- description = f"This chart illustrates the top {num_columns} sectors by total funded amount. The blue bars represent the total funded amount, while the red line indicates the count of loans."
98
 
99
  # Display description
100
  st.write(description)
@@ -140,17 +140,19 @@ elif page == "Top Values by Selected Variable":
140
  if plot_type != 'repayment_interval':
141
  top_values_sorted = df_kiva_loans_cleaned.groupby(plot_type)['funded_amount'].agg('sum').nlargest(num_columns).index
142
  sns.boxplot(x=plot_type, y='funded_amount', data=filtered_df_boxplot, order=top_values_sorted, ax=ax)
 
143
  else:
144
  sns.boxplot(x=plot_type, y='funded_amount', data=filtered_df_boxplot, ax=ax)
 
145
 
146
  plt.title('Funded Amount by Selected Variable')
147
  plt.xlabel(plot_type)
148
  plt.ylabel('Funded Amount')
149
- plt.xticks(rotation=45)
150
  st.pyplot(fig)
151
 
152
  # Display description for boxplot
153
- st.write(f"This boxplot shows the distribution of funded amounts for the top {num_columns} {plot_type.replace('_', ' ')}. It provides insights into the spread and outliers of funded amounts.")
154
 
155
  # Page 4: Other Plots
156
  elif page == "Repayment Interval by Selected Variable":
@@ -172,11 +174,11 @@ elif page == "Repayment Interval by Selected Variable":
172
  if plot_var == 'sector':
173
  top_values_plot = df_kiva_loans_cleaned.groupby('sector')['funded_amount'].agg('count').nlargest(num_top_values).index
174
  filtered_df_plot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['sector'].isin(top_values_plot)]
175
- description = f"This countplot shows the distribution of repayment intervals for the top {num_top_values} sectors based on the number of loans."
176
  elif plot_var == 'country':
177
  top_values_plot = df_kiva_loans_cleaned.groupby('country')['funded_amount'].agg('count').nlargest(num_top_values).index
178
  filtered_df_plot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['country'].isin(top_values_plot)]
179
- description = f"This countplot illustrates the distribution of repayment intervals for the top {num_top_values} countries based on the number of loans."
180
 
181
  # Display description
182
  st.write(description)
@@ -211,10 +213,10 @@ elif page == "Country Comparison Deepdive":
211
  st.subheader("Country Comparison Deepdive")
212
 
213
  # Multi-select for countries
214
- selected_countries = st.multiselect("Select Countries to Compare", options=df_kiva_loans_cleaned['country'].unique())
215
 
216
  # Option to choose between count or sum of funded amounts
217
- aggregation_option = st.radio("Select Aggregation Type:", ("Count", "Sum"))
218
 
219
  if selected_countries:
220
  # Filter the data based on selected countries
@@ -226,10 +228,12 @@ elif page == "Country Comparison Deepdive":
226
  sector_summary = filtered_data.groupby(['country', 'sector']).agg(
227
  total_funded_amount=('funded_amount', 'sum')
228
  ).reset_index()
 
229
  else: # Count
230
  sector_summary = filtered_data.groupby(['country', 'sector']).agg(
231
  total_funded_amount=('funded_amount', 'count')
232
  ).reset_index()
 
233
 
234
  fig, ax = plt.subplots(figsize=(12, 6))
235
  sns.barplot(x='sector', y='total_funded_amount', hue='country', data=sector_summary, ax=ax)
@@ -241,14 +245,16 @@ elif page == "Country Comparison Deepdive":
241
 
242
  # Create a combined bar plot for repayment summary
243
  st.subheader("Total Funded Amounts by Repayment Interval for Selected Countries")
244
- if aggregation_option == "Sum":
245
  repayment_summary = filtered_data.groupby(['country', 'repayment_interval']).agg(
246
  total_funded_amount=('funded_amount', 'sum')
247
  ).reset_index()
 
248
  else: # Count
249
  repayment_summary = filtered_data.groupby(['country', 'repayment_interval']).agg(
250
  total_funded_amount=('funded_amount', 'count')
251
  ).reset_index()
 
252
 
253
  fig, ax = plt.subplots(figsize=(12, 6))
254
  sns.barplot(x='repayment_interval', y='total_funded_amount', hue='country', data=repayment_summary, ax=ax)
@@ -258,17 +264,17 @@ elif page == "Country Comparison Deepdive":
258
  plt.xticks(rotation=45)
259
  st.pyplot(fig)
260
  else:
261
- st.write("Please select one or more countries to compare.")
262
 
263
  # Page 6: Sector Comparison
264
  elif page == "Sector Comparison Deepdive":
265
  st.subheader("Sector Comparison Deepdive")
266
 
267
  # Multi-select for sectors
268
- selected_sectors = st.multiselect("Select Sectors to Compare", options=df_kiva_loans_cleaned['sector'].unique())
269
 
270
  # Option to choose between count or sum of funded amounts
271
- aggregation_option = st.radio("Select Aggregation Type:", ("Count", "Sum"))
272
 
273
  if selected_sectors:
274
  # Filter the data based on selected sectors
@@ -276,14 +282,16 @@ elif page == "Sector Comparison Deepdive":
276
 
277
  # Create a combined bar plot for sector summary by country
278
  st.subheader("Total Funded Amounts by Country for Selected Sectors")
279
- if aggregation_option == "Sum":
280
  country_summary = filtered_data.groupby(['country', 'sector']).agg(
281
  total_funded_amount=('funded_amount', 'sum')
282
  ).reset_index()
 
283
  else: # Count
284
  country_summary = filtered_data.groupby(['country', 'sector']).agg(
285
  total_funded_amount=('funded_amount', 'count')
286
  ).reset_index()
 
287
 
288
  fig, ax = plt.subplots(figsize=(12, 6))
289
  sns.barplot(x='country', y='total_funded_amount', hue='sector', data=country_summary, ax=ax)
@@ -300,10 +308,12 @@ elif page == "Sector Comparison Deepdive":
300
  repayment_summary = filtered_data.groupby(['repayment_interval', 'sector']).agg(
301
  total_funded_amount=('funded_amount', 'sum')
302
  ).reset_index()
 
303
  else: # Count
304
  repayment_summary = filtered_data.groupby(['repayment_interval', 'sector']).agg(
305
  total_funded_amount=('funded_amount', 'count')
306
  ).reset_index()
 
307
 
308
  fig, ax = plt.subplots(figsize=(12, 6))
309
  sns.barplot(x='repayment_interval', y='total_funded_amount', hue='sector', data=repayment_summary, ax=ax)
@@ -314,4 +324,5 @@ elif page == "Sector Comparison Deepdive":
314
  plt.xticks(rotation=90)
315
  st.pyplot(fig)
316
  else:
317
- st.write("Please select one or more sectors to compare.")
 
 
41
  This application provides insights into Kiva loans data.
42
  You can explore the distribution of funded amounts,
43
  analyze top values by selected variables, and visualize
44
+ relationships between funded amounts and various factors such as Countries and Sectors that the loans were funded.
45
  """)
46
 
47
  # Data Overview Page
 
61
  title='Distribution of Funded Amounts'
62
  )
63
  st.altair_chart(chart, use_container_width=True)
64
+ st.write("This chart shows the distribution of funded amounts for Kiva loans. The x-axis represents the funded amount, while the y-axis shows the count of loans that fall within each bin. As you can see most of the loans are low valued with most of them being in the range of 100 and 500")
65
 
66
  # Page 3: Top Values by Selected Variable
67
  elif page == "Top Values by Selected Variable":
 
72
 
73
  # Slider to select the number of top values to display
74
  num_columns = st.slider(
75
+ "Select Number of Columns to Display on the Chart",
76
  min_value=5,
77
  max_value=50,
78
  value=10, # default value
 
84
  top_values = df_kiva_loans_cleaned.groupby('country')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
85
  x_column = 'country'
86
  count_column = 'count'
87
+ description = f"This chart displays the top {num_columns} countries by total funded amount. The blue bars represent the total funded amount, while the red line indicates the count of loans. In general Phillipines is the country with the most loans followed by Kenya and El Salvador."
88
  elif plot_type == 'repayment_interval':
89
  top_values = df_kiva_loans_cleaned.groupby('repayment_interval')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
90
  x_column = 'repayment_interval'
91
  count_column = 'count'
92
+ description = f"This chart shows the top {num_columns} repayment intervals by total funded amount. The blue bars represent the total funded amount, while the red line indicates the count of loans. Most of the loans are funded with a monthly repayment interval, where the bullet repayment is an unsusal choice"
93
  else: # sector
94
  top_values = df_kiva_loans_cleaned.groupby('sector')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
95
  x_column = 'sector'
96
  count_column = 'count'
97
+ description = f"This chart illustrates the top {num_columns} sectors by total funded amount. The blue bars represent the total funded amount, while the red line indicates the count of loans. Most loans are funded to the Aggriculture Sector with Food and Retail completing the first three. Looks like that if the sector of the business is close to Primary production or its Basic Necessities(food) "
98
 
99
  # Display description
100
  st.write(description)
 
140
  if plot_type != 'repayment_interval':
141
  top_values_sorted = df_kiva_loans_cleaned.groupby(plot_type)['funded_amount'].agg('sum').nlargest(num_columns).index
142
  sns.boxplot(x=plot_type, y='funded_amount', data=filtered_df_boxplot, order=top_values_sorted, ax=ax)
143
+ st.write(f"This boxplot shows the distribution of funded amounts for the top {num_columns} {plot_type.replace('_', ' ')}. It provides insights into the spread and outliers of funded amounts.")
144
  else:
145
  sns.boxplot(x=plot_type, y='funded_amount', data=filtered_df_boxplot, ax=ax)
146
+ st.write(f"This boxplot shows the distribution of funded amounts for the top {num_columns} {plot_type.replace('_', ' ')}. It provides insights into the spread and outliers of funded amounts.")
147
 
148
  plt.title('Funded Amount by Selected Variable')
149
  plt.xlabel(plot_type)
150
  plt.ylabel('Funded Amount')
151
+ plt.xticks(rotation=90)
152
  st.pyplot(fig)
153
 
154
  # Display description for boxplot
155
+
156
 
157
  # Page 4: Other Plots
158
  elif page == "Repayment Interval by Selected Variable":
 
174
  if plot_var == 'sector':
175
  top_values_plot = df_kiva_loans_cleaned.groupby('sector')['funded_amount'].agg('count').nlargest(num_top_values).index
176
  filtered_df_plot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['sector'].isin(top_values_plot)]
177
+ description = f"This countplot shows the distribution of repayment intervals for the top {num_top_values} sectors based on the number of loans. In terms of sectors Agriculture got the most monthly repayment loans followed by food. Also a lot of irregulars were in the Food, Retail and Agriculture sectors, which again confirms that loans for first necessities are given more easily. "
178
  elif plot_var == 'country':
179
  top_values_plot = df_kiva_loans_cleaned.groupby('country')['funded_amount'].agg('count').nlargest(num_top_values).index
180
  filtered_df_plot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['country'].isin(top_values_plot)]
181
+ description = f"This countplot illustrates the distribution of repayment intervals for the top {num_top_values} countries based on the number of loans. In terms of countries the Phillipines had a great number of Irregular loans."
182
 
183
  # Display description
184
  st.write(description)
 
213
  st.subheader("Country Comparison Deepdive")
214
 
215
  # Multi-select for countries
216
+ selected_countries = st.multiselect("Select Countries to Compare(Please select one or more)", options=df_kiva_loans_cleaned['country'].unique())
217
 
218
  # Option to choose between count or sum of funded amounts
219
+ aggregation_option = st.radio("Select Aggregation Type:", ("Count of Loans", "Summary of Funded Amount"))
220
 
221
  if selected_countries:
222
  # Filter the data based on selected countries
 
228
  sector_summary = filtered_data.groupby(['country', 'sector']).agg(
229
  total_funded_amount=('funded_amount', 'sum')
230
  ).reset_index()
231
+ st.write("This graph shows the total funded amount in each Sector for the selected Countries by the user.")
232
  else: # Count
233
  sector_summary = filtered_data.groupby(['country', 'sector']).agg(
234
  total_funded_amount=('funded_amount', 'count')
235
  ).reset_index()
236
+ st.write("This graph shows the number of loans in each Sector for the selected Countries by the user.")
237
 
238
  fig, ax = plt.subplots(figsize=(12, 6))
239
  sns.barplot(x='sector', y='total_funded_amount', hue='country', data=sector_summary, ax=ax)
 
245
 
246
  # Create a combined bar plot for repayment summary
247
  st.subheader("Total Funded Amounts by Repayment Interval for Selected Countries")
248
+ if aggregation_option == "Summary of Funded Amount":
249
  repayment_summary = filtered_data.groupby(['country', 'repayment_interval']).agg(
250
  total_funded_amount=('funded_amount', 'sum')
251
  ).reset_index()
252
+ st.write("This graph shows the total funded amount in each Repayment interval for the selected Countries by the user.")
253
  else: # Count
254
  repayment_summary = filtered_data.groupby(['country', 'repayment_interval']).agg(
255
  total_funded_amount=('funded_amount', 'count')
256
  ).reset_index()
257
+ st.write("This graph shows the number of loans in each Repayment interval for the selected Countries by the user.")
258
 
259
  fig, ax = plt.subplots(figsize=(12, 6))
260
  sns.barplot(x='repayment_interval', y='total_funded_amount', hue='country', data=repayment_summary, ax=ax)
 
264
  plt.xticks(rotation=45)
265
  st.pyplot(fig)
266
  else:
267
+ st.write("Please select one or more countries to compare from the dropdown above.")
268
 
269
  # Page 6: Sector Comparison
270
  elif page == "Sector Comparison Deepdive":
271
  st.subheader("Sector Comparison Deepdive")
272
 
273
  # Multi-select for sectors
274
+ selected_sectors = st.multiselect("Select Sectors to Compare (Please select one or more)", options=df_kiva_loans_cleaned['sector'].unique())
275
 
276
  # Option to choose between count or sum of funded amounts
277
+ aggregation_option = st.radio("Select Aggregation Type:", ("Count of Loans", "Summmary of Funded Amount"))
278
 
279
  if selected_sectors:
280
  # Filter the data based on selected sectors
 
282
 
283
  # Create a combined bar plot for sector summary by country
284
  st.subheader("Total Funded Amounts by Country for Selected Sectors")
285
+ if aggregation_option == "Summary of Funded Amount":
286
  country_summary = filtered_data.groupby(['country', 'sector']).agg(
287
  total_funded_amount=('funded_amount', 'sum')
288
  ).reset_index()
289
+ st.write("This graph shows the total funded amount in each Country, for the selected Sectors by the user.")
290
  else: # Count
291
  country_summary = filtered_data.groupby(['country', 'sector']).agg(
292
  total_funded_amount=('funded_amount', 'count')
293
  ).reset_index()
294
+ st.write("This graph shows the number of loans in each Country, for the selected Sectors by the user.")
295
 
296
  fig, ax = plt.subplots(figsize=(12, 6))
297
  sns.barplot(x='country', y='total_funded_amount', hue='sector', data=country_summary, ax=ax)
 
308
  repayment_summary = filtered_data.groupby(['repayment_interval', 'sector']).agg(
309
  total_funded_amount=('funded_amount', 'sum')
310
  ).reset_index()
311
+ st.write("This graph shows the funded amount in each Repayment interval for the selected Sectors by the user.")
312
  else: # Count
313
  repayment_summary = filtered_data.groupby(['repayment_interval', 'sector']).agg(
314
  total_funded_amount=('funded_amount', 'count')
315
  ).reset_index()
316
+ st.write("This graph shows the number of loans in each Repayment interval for the selected Sectors by the user.")
317
 
318
  fig, ax = plt.subplots(figsize=(12, 6))
319
  sns.barplot(x='repayment_interval', y='total_funded_amount', hue='sector', data=repayment_summary, ax=ax)
 
324
  plt.xticks(rotation=90)
325
  st.pyplot(fig)
326
  else:
327
+ st.write("Please select one or more countries to compare from the dropdown above.")
328
+