Spaces:

Tryfonas
/

Module1_Final_Assignment

Sleeping

App Files Files Community

Module1_Final_Assignment / app.py

Tryfonas

Upload folder using huggingface_hub

dcb9a04 verified 3 months ago

raw

history blame contribute delete

5.45 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import joblib
	import shap
	import xgboost as xgb

	# Load the saved model and preprocessing objects
	model_xgb = joblib.load('model_xgb.joblib')
	scaler = joblib.load('scaler.joblib')
	ohe = joblib.load('ohe.joblib')

	# Extract the unique values for 'country' and 'sector' from the OneHotEncoder (ohe) object
	unique_sectors = ohe.categories_[0]

	# Define a mapping of encoded values to repayment interval labels
	repayment_interval_mapping = {0: '🚅 Bullet Repayment Interval', 1: '🪙 Irregular Repayment Interval', 2: '📅 Monthly Repayment Interval'}

	# Title with emojis and colors
	st.markdown("<h1 style='text-align: center; color: blue;'>📊 Loan Repayment Interval Prediction for Phillipines(🇵🇭)📈</h1>", unsafe_allow_html=True)

	# Input Features Section with emojis and description
	st.write("## 🎯 Input Features")
	st.markdown("Please select the variables to predict the repayment interval based on historical data. The model assumes that all loans used for training were successfully repaid. Kindly provide the following information:")

	# User input fields using unique country and sector values from the OneHotEncoder object
	sector = st.selectbox('🏢 Sector', unique_sectors)
	funded_amount = st.number_input('💰 Funded Amount', min_value=0, max_value=20000, value=1000, step=50)
	lender_count = st.number_input('👥 Lender Count', min_value=1, max_value=200, value=2, step=1)

	# Create a sample observation from the user input
	sample_listing = pd.DataFrame({
	'sector': [sector],
	'funded_amount': [funded_amount],
	'lender_count': [lender_count],
	})

	# Separate categorical and numerical features
	cat_features = ['sector']
	num_features = ['funded_amount', 'lender_count']

	# Get the feature names from the OneHotEncoder
	ohe_feature_names = ohe.get_feature_names_out(cat_features)

	# Combine numerical feature names with encoded categorical feature names
	feature_names = np.concatenate([num_features, ohe_feature_names])

	# One-hot encode categorical features
	X_cat = pd.DataFrame(ohe.transform(sample_listing[cat_features]), columns=ohe_feature_names)

	# Scale numerical features
	X_num = pd.DataFrame(scaler.transform(sample_listing[num_features]), columns=num_features)

	# Combine processed features
	X_processed = pd.concat([X_num, X_cat], axis=1)

	# Make a prediction (returns the encoded value)
	predicted_encoded_repayment_interval = model_xgb.predict(X_processed)[0]

	# Map the encoded value back to the actual repayment interval label
	predicted_repayment_interval = repayment_interval_mapping.get(int(predicted_encoded_repayment_interval), "Unknown")

	# Display the actual repayment interval label with more style
	st.title("✅ Predicted Repayment Interval:")
	st.markdown(f"<h2 style='color:green;'>{predicted_repayment_interval}</h2>", unsafe_allow_html=True)

	# Explanation for SHAP force plots
	st.write("## 🔍 SHAP Explanation")
	st.markdown("The following SHAP plot explains the model's decision for the predicted repayment interval. This visualization helps you understand the key features that influenced the model's prediction. The red features push the result higher and the blue ones push it lower.")

	# SHAP explanations
	explainer = shap.TreeExplainer(model_xgb)
	shap_values = explainer.shap_values(X_processed)

	# Function to add background color to SHAP plots
	def add_background(html_content):
	white_background_style = "<style>body { background-color: white; }</style>"
	return white_background_style + html_content

	# Generate and display SHAP force plot based on the predicted repayment interval
	if predicted_encoded_repayment_interval == 0:
	st.write("### 🚅 SHAP Force Plot for Bullet Repayment Interval")
	shap_html_path_0 = "shap_force_plot_class_0.html"
	shap.save_html(shap_html_path_0, shap.force_plot(
	explainer.expected_value[0],
	shap_values[0][:, 0],
	X_processed.iloc[0, :].values,
	feature_names,
	show=False,
	matplotlib=False
	))
	with open(shap_html_path_0, 'r', encoding='utf-8') as f:
	shap_html_0 = f.read()
	st.components.v1.html(add_background(shap_html_0), height=130)

	elif predicted_encoded_repayment_interval == 1:
	st.write("### 🪙 SHAP Force Plot for Irregular Repayment Interval")
	shap_html_path_1 = "shap_force_plot_class_1.html"
	shap.save_html(shap_html_path_1, shap.force_plot(
	explainer.expected_value[1],
	shap_values[0][:, 1],
	X_processed.iloc[0, :].values,
	feature_names,
	show=False,
	matplotlib=False
	))
	with open(shap_html_path_1, 'r', encoding='utf-8') as f:
	shap_html_1 = f.read()
	st.components.v1.html(add_background(shap_html_1), height=130)

	elif predicted_encoded_repayment_interval == 2:
	st.write("### 📅 SHAP Force Plot for Monthly Repayment Interval")
	shap_html_path_2 = "shap_force_plot_class_2.html"
	shap.save_html(shap_html_path_2, shap.force_plot(
	explainer.expected_value[2],
	shap_values[0][:, 2],
	X_processed.iloc[0, :].values,
	feature_names,
	show=False,
	matplotlib=False
	))
	with open(shap_html_path_2, 'r', encoding='utf-8') as f:
	shap_html_2 = f.read()
	st.components.v1.html(add_background(shap_html_2), height=130)