Tryfonas commited on
Commit
d284e66
Β·
verified Β·
1 Parent(s): a6c0b25

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ kiva_loans.csv filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import shap
6
+ import xgboost as xgb
7
+
8
+ # Load the saved model and preprocessing objects
9
+ model_xgb = joblib.load('model_xgb.joblib')
10
+ scaler = joblib.load('scaler.joblib')
11
+ ohe = joblib.load('ohe.joblib')
12
+
13
+ # Extract the unique values for 'country' and 'sector' from the OneHotEncoder (ohe) object
14
+ unique_countries = ohe.categories_[0] # Assuming 'country' is the first categorical feature
15
+ unique_sectors = ohe.categories_[1] # Assuming 'sector' is the second categorical feature
16
+
17
+ # Define a mapping of encoded values to repayment interval labels
18
+ repayment_interval_mapping = {0: 'πŸš… Bullet Repayment Interval', 1: 'πŸͺ™ Irregular Repayment Interval', 2: 'πŸ“… Monthly Repayment Interval'}
19
+
20
+ # Title with emojis and colors
21
+ st.markdown("<h1 style='text-align: center; color: blue;'>πŸ“Š Loan Repayment Interval Prediction πŸ“ˆ</h1>", unsafe_allow_html=True)
22
+
23
+ # Input Features Section with emojis and description
24
+ st.write("## 🎯 Input Features")
25
+ st.markdown("Here you can choose the variables to predict the repayment interval based on historical data. Please provide the following details:")
26
+
27
+ # User input fields using unique country and sector values from the OneHotEncoder object
28
+ country = st.selectbox('🌍 Country', unique_countries)
29
+ sector = st.selectbox('🏒 Sector', unique_sectors)
30
+ funded_amount = st.number_input('πŸ’° Funded Amount', min_value=0, max_value=10000, value=1000, step=50)
31
+ lender_count = st.number_input('πŸ‘₯ Lender Count', min_value=1, max_value=100, value=2, step=1)
32
+
33
+ # Create a sample observation from the user input
34
+ sample_listing = pd.DataFrame({
35
+ 'country': [country],
36
+ 'sector': [sector],
37
+ 'funded_amount': [funded_amount],
38
+ 'lender_count': [lender_count],
39
+ })
40
+
41
+ # Separate categorical and numerical features
42
+ cat_features = ['country', 'sector']
43
+ num_features = ['funded_amount', 'lender_count']
44
+
45
+ # Get the feature names from the OneHotEncoder
46
+ ohe_feature_names = ohe.get_feature_names_out(cat_features)
47
+
48
+ # Combine numerical feature names with encoded categorical feature names
49
+ feature_names = np.concatenate([num_features, ohe_feature_names])
50
+
51
+ # One-hot encode categorical features
52
+ X_cat = pd.DataFrame(ohe.transform(sample_listing[cat_features]), columns=ohe_feature_names)
53
+
54
+ # Scale numerical features
55
+ X_num = pd.DataFrame(scaler.transform(sample_listing[num_features]), columns=num_features)
56
+
57
+ # Combine processed features
58
+ X_processed = pd.concat([X_num, X_cat], axis=1)
59
+
60
+ # Make a prediction (returns the encoded value)
61
+ predicted_encoded_repayment_interval = model_xgb.predict(X_processed)[0]
62
+
63
+ # Map the encoded value back to the actual repayment interval label
64
+ predicted_repayment_interval = repayment_interval_mapping.get(int(predicted_encoded_repayment_interval), "Unknown")
65
+
66
+ # Display the actual repayment interval label with more style
67
+ st.title("βœ… Predicted Repayment Interval:")
68
+ st.markdown(f"<h2 style='color:green;'>{predicted_repayment_interval}</h2>", unsafe_allow_html=True)
69
+
70
+ # Explanation for SHAP force plots
71
+ st.write("## πŸ” SHAP Explanation")
72
+ st.markdown("The following SHAP plots explain the model's decision for each repayment interval type. These visualizations help you understand the key features that influenced the model's prediction.")
73
+
74
+ # SHAP explanations
75
+ explainer = shap.TreeExplainer(model_xgb)
76
+ shap_values = explainer.shap_values(X_processed)
77
+
78
+ # Function to add background color to SHAP plots
79
+ def add_background(html_content):
80
+ white_background_style = "<style>body { background-color: white; }</style>"
81
+ return white_background_style + html_content
82
+
83
+ # Generate and save SHAP force plot for Class 0 (Bullet)
84
+ st.write("### πŸš… SHAP Force Plot for Class 0: Bullet Repayment Interval")
85
+ st.markdown("This plot explains the factors influencing the Bullet repayment interval prediction. Bullet repayment means paying off the loan in one lump sum at the end of the loan period.")
86
+ shap_html_path_0 = "shap_force_plot_class_0.html"
87
+ shap.save_html(shap_html_path_0, shap.force_plot(
88
+ explainer.expected_value[0],
89
+ shap_values[0][:, 0],
90
+ X_processed.iloc[0, :].values,
91
+ feature_names,
92
+ show=False,
93
+ matplotlib=False
94
+ ))
95
+ with open(shap_html_path_0, 'r', encoding='utf-8') as f:
96
+ shap_html_0 = f.read()
97
+ st.components.v1.html(add_background(shap_html_0), height=130)
98
+
99
+ # Generate and save SHAP force plot for Class 1 (Irregular)
100
+ st.write("### πŸͺ™ SHAP Force Plot for Class 1: Irregular Repayment Interval")
101
+ st.markdown("This plot explains the factors influencing the Irregular repayment interval prediction. Irregular repayment means paying off the loan at irregular intervals based on specific conditions.")
102
+ shap_html_path_1 = "shap_force_plot_class_1.html"
103
+ shap.save_html(shap_html_path_1, shap.force_plot(
104
+ explainer.expected_value[1],
105
+ shap_values[0][:, 1],
106
+ X_processed.iloc[0, :].values,
107
+ feature_names,
108
+ show=False,
109
+ matplotlib=False
110
+ ))
111
+ with open(shap_html_path_1, 'r', encoding='utf-8') as f:
112
+ shap_html_1 = f.read()
113
+ st.components.v1.html(add_background(shap_html_1), height=130)
114
+
115
+ # Generate and save SHAP force plot for Class 2 (Monthly)
116
+ st.write("### πŸ“… SHAP Force Plot for Class 2: Monthly Repayment Interval")
117
+ st.markdown("This plot explains the factors influencing the Monthly repayment interval prediction. Monthly repayment means paying off the loan in equal monthly installments.")
118
+ shap_html_path_2 = "shap_force_plot_class_2.html"
119
+ shap.save_html(shap_html_path_2, shap.force_plot(
120
+ explainer.expected_value[2],
121
+ shap_values[0][:, 2],
122
+ X_processed.iloc[0, :].values,
123
+ feature_names,
124
+ show=False,
125
+ matplotlib=False
126
+ ))
127
+ with open(shap_html_path_2, 'r', encoding='utf-8') as f:
128
+ shap_html_2 = f.read()
129
+ st.components.v1.html(add_background(shap_html_2), height=130)
kiva_loans.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b20efc20de600b27608d69fe07e728b00a075c3db29849e146b717098f778d92
3
+ size 195852823
model_xgb.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98dd28b386001fc78e8261ec56685bd758c88b9ec0c5bedb73554c5b3953cf45
3
+ size 1648037
ohe.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a42f3b58afdd17d47b9a67203087a2619a51c65ad916d9b44eb172c078c0a6a
3
+ size 2809
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ streamlit
3
+ pandas
4
+ numpy
5
+ xgboost
6
+ scikit-learn
7
+ shap
8
+ matplotlib
9
+ joblib
scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82b8e1f41898b2d08758feeed3e15d37e15d577a0f5a701d931a611c39774546
3
+ size 999
shap_force_plot_class_0.html ADDED
The diff for this file is too large to render. See raw diff
 
shap_force_plot_class_1.html ADDED
The diff for this file is too large to render. See raw diff
 
shap_force_plot_class_2.html ADDED
The diff for this file is too large to render. See raw diff