import streamlit as st import pandas as pd import numpy as np import joblib import shap import xgboost as xgb # Load the saved model and preprocessing objects model_xgb = joblib.load('model_xgb.joblib') scaler = joblib.load('scaler.joblib') ohe = joblib.load('ohe.joblib') # Extract the unique values for 'country' and 'sector' from the OneHotEncoder (ohe) object unique_sectors = ohe.categories_[0] # Define a mapping of encoded values to repayment interval labels repayment_interval_mapping = {0: '🚅 Bullet Repayment Interval', 1: '🪙 Irregular Repayment Interval', 2: '📅 Monthly Repayment Interval'} # Title with emojis and colors st.markdown("

📊 Loan Repayment Interval Prediction for Phillipines(🇵🇭)📈

", unsafe_allow_html=True) # Input Features Section with emojis and description st.write("## 🎯 Input Features") st.markdown("Please select the variables to predict the repayment interval based on historical data. The model assumes that all loans used for training were successfully repaid. Kindly provide the following information:") # User input fields using unique country and sector values from the OneHotEncoder object sector = st.selectbox('🏢 Sector', unique_sectors) funded_amount = st.number_input('💰 Funded Amount', min_value=0, max_value=20000, value=1000, step=50) lender_count = st.number_input('👥 Lender Count', min_value=1, max_value=200, value=2, step=1) # Create a sample observation from the user input sample_listing = pd.DataFrame({ 'sector': [sector], 'funded_amount': [funded_amount], 'lender_count': [lender_count], }) # Separate categorical and numerical features cat_features = ['sector'] num_features = ['funded_amount', 'lender_count'] # Get the feature names from the OneHotEncoder ohe_feature_names = ohe.get_feature_names_out(cat_features) # Combine numerical feature names with encoded categorical feature names feature_names = np.concatenate([num_features, ohe_feature_names]) # One-hot encode categorical features X_cat = pd.DataFrame(ohe.transform(sample_listing[cat_features]), columns=ohe_feature_names) # Scale numerical features X_num = pd.DataFrame(scaler.transform(sample_listing[num_features]), columns=num_features) # Combine processed features X_processed = pd.concat([X_num, X_cat], axis=1) # Make a prediction (returns the encoded value) predicted_encoded_repayment_interval = model_xgb.predict(X_processed)[0] # Map the encoded value back to the actual repayment interval label predicted_repayment_interval = repayment_interval_mapping.get(int(predicted_encoded_repayment_interval), "Unknown") # Display the actual repayment interval label with more style st.title("✅ Predicted Repayment Interval:") st.markdown(f"

{predicted_repayment_interval}

", unsafe_allow_html=True) # Explanation for SHAP force plots st.write("## 🔍 SHAP Explanation") st.markdown("The following SHAP plot explains the model's decision for the predicted repayment interval. This visualization helps you understand the key features that influenced the model's prediction. The red features push the result higher and the blue ones push it lower.") # SHAP explanations explainer = shap.TreeExplainer(model_xgb) shap_values = explainer.shap_values(X_processed) # Function to add background color to SHAP plots def add_background(html_content): white_background_style = "" return white_background_style + html_content # Generate and display SHAP force plot based on the predicted repayment interval if predicted_encoded_repayment_interval == 0: st.write("### 🚅 SHAP Force Plot for Bullet Repayment Interval") shap_html_path_0 = "shap_force_plot_class_0.html" shap.save_html(shap_html_path_0, shap.force_plot( explainer.expected_value[0], shap_values[0][:, 0], X_processed.iloc[0, :].values, feature_names, show=False, matplotlib=False )) with open(shap_html_path_0, 'r', encoding='utf-8') as f: shap_html_0 = f.read() st.components.v1.html(add_background(shap_html_0), height=130) elif predicted_encoded_repayment_interval == 1: st.write("### 🪙 SHAP Force Plot for Irregular Repayment Interval") shap_html_path_1 = "shap_force_plot_class_1.html" shap.save_html(shap_html_path_1, shap.force_plot( explainer.expected_value[1], shap_values[0][:, 1], X_processed.iloc[0, :].values, feature_names, show=False, matplotlib=False )) with open(shap_html_path_1, 'r', encoding='utf-8') as f: shap_html_1 = f.read() st.components.v1.html(add_background(shap_html_1), height=130) elif predicted_encoded_repayment_interval == 2: st.write("### 📅 SHAP Force Plot for Monthly Repayment Interval") shap_html_path_2 = "shap_force_plot_class_2.html" shap.save_html(shap_html_path_2, shap.force_plot( explainer.expected_value[2], shap_values[0][:, 2], X_processed.iloc[0, :].values, feature_names, show=False, matplotlib=False )) with open(shap_html_path_2, 'r', encoding='utf-8') as f: shap_html_2 = f.read() st.components.v1.html(add_background(shap_html_2), height=130)