Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import joblib | |
import shap | |
import xgboost as xgb | |
# Load the saved model and preprocessing objects | |
model_xgb = joblib.load('model_xgb.joblib') | |
scaler = joblib.load('scaler.joblib') | |
ohe = joblib.load('ohe.joblib') | |
# Extract the unique values for 'country' and 'sector' from the OneHotEncoder (ohe) object | |
unique_sectors = ohe.categories_[0] | |
# Define a mapping of encoded values to repayment interval labels | |
repayment_interval_mapping = {0: 'π Bullet Repayment Interval', 1: 'πͺ Irregular Repayment Interval', 2: 'π Monthly Repayment Interval'} | |
# Title with emojis and colors | |
st.markdown("<h1 style='text-align: center; color: blue;'>π Loan Repayment Interval Prediction for Phillipines(π΅π)π</h1>", unsafe_allow_html=True) | |
# Input Features Section with emojis and description | |
st.write("## π― Input Features") | |
st.markdown("Please select the variables to predict the repayment interval based on historical data. The model assumes that all loans used for training were successfully repaid. Kindly provide the following information:") | |
# User input fields using unique country and sector values from the OneHotEncoder object | |
sector = st.selectbox('π’ Sector', unique_sectors) | |
funded_amount = st.number_input('π° Funded Amount', min_value=0, max_value=20000, value=1000, step=50) | |
lender_count = st.number_input('π₯ Lender Count', min_value=1, max_value=200, value=2, step=1) | |
# Create a sample observation from the user input | |
sample_listing = pd.DataFrame({ | |
'sector': [sector], | |
'funded_amount': [funded_amount], | |
'lender_count': [lender_count], | |
}) | |
# Separate categorical and numerical features | |
cat_features = ['sector'] | |
num_features = ['funded_amount', 'lender_count'] | |
# Get the feature names from the OneHotEncoder | |
ohe_feature_names = ohe.get_feature_names_out(cat_features) | |
# Combine numerical feature names with encoded categorical feature names | |
feature_names = np.concatenate([num_features, ohe_feature_names]) | |
# One-hot encode categorical features | |
X_cat = pd.DataFrame(ohe.transform(sample_listing[cat_features]), columns=ohe_feature_names) | |
# Scale numerical features | |
X_num = pd.DataFrame(scaler.transform(sample_listing[num_features]), columns=num_features) | |
# Combine processed features | |
X_processed = pd.concat([X_num, X_cat], axis=1) | |
# Make a prediction (returns the encoded value) | |
predicted_encoded_repayment_interval = model_xgb.predict(X_processed)[0] | |
# Map the encoded value back to the actual repayment interval label | |
predicted_repayment_interval = repayment_interval_mapping.get(int(predicted_encoded_repayment_interval), "Unknown") | |
# Display the actual repayment interval label with more style | |
st.title("β Predicted Repayment Interval:") | |
st.markdown(f"<h2 style='color:green;'>{predicted_repayment_interval}</h2>", unsafe_allow_html=True) | |
# Explanation for SHAP force plots | |
st.write("## π SHAP Explanation") | |
st.markdown("The following SHAP plot explains the model's decision for the predicted repayment interval. This visualization helps you understand the key features that influenced the model's prediction. The red features push the result higher and the blue ones push it lower.") | |
# SHAP explanations | |
explainer = shap.TreeExplainer(model_xgb) | |
shap_values = explainer.shap_values(X_processed) | |
# Function to add background color to SHAP plots | |
def add_background(html_content): | |
white_background_style = "<style>body { background-color: white; }</style>" | |
return white_background_style + html_content | |
# Generate and display SHAP force plot based on the predicted repayment interval | |
if predicted_encoded_repayment_interval == 0: | |
st.write("### π SHAP Force Plot for Bullet Repayment Interval") | |
shap_html_path_0 = "shap_force_plot_class_0.html" | |
shap.save_html(shap_html_path_0, shap.force_plot( | |
explainer.expected_value[0], | |
shap_values[0][:, 0], | |
X_processed.iloc[0, :].values, | |
feature_names, | |
show=False, | |
matplotlib=False | |
)) | |
with open(shap_html_path_0, 'r', encoding='utf-8') as f: | |
shap_html_0 = f.read() | |
st.components.v1.html(add_background(shap_html_0), height=130) | |
elif predicted_encoded_repayment_interval == 1: | |
st.write("### πͺ SHAP Force Plot for Irregular Repayment Interval") | |
shap_html_path_1 = "shap_force_plot_class_1.html" | |
shap.save_html(shap_html_path_1, shap.force_plot( | |
explainer.expected_value[1], | |
shap_values[0][:, 1], | |
X_processed.iloc[0, :].values, | |
feature_names, | |
show=False, | |
matplotlib=False | |
)) | |
with open(shap_html_path_1, 'r', encoding='utf-8') as f: | |
shap_html_1 = f.read() | |
st.components.v1.html(add_background(shap_html_1), height=130) | |
elif predicted_encoded_repayment_interval == 2: | |
st.write("### π SHAP Force Plot for Monthly Repayment Interval") | |
shap_html_path_2 = "shap_force_plot_class_2.html" | |
shap.save_html(shap_html_path_2, shap.force_plot( | |
explainer.expected_value[2], | |
shap_values[0][:, 2], | |
X_processed.iloc[0, :].values, | |
feature_names, | |
show=False, | |
matplotlib=False | |
)) | |
with open(shap_html_path_2, 'r', encoding='utf-8') as f: | |
shap_html_2 = f.read() | |
st.components.v1.html(add_background(shap_html_2), height=130) | |