haizad's picture
add label to plot
e4f91e7
import gradio as gr
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.utils import shuffle
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from skops.hub_utils import download
import joblib
import shutil
# load dataset
def load_ames_housing():
df = fetch_openml(name="house_prices", as_frame=True, parser="pandas")
X = df.data
y = df.target
features = [
"YrSold",
"HeatingQC",
"Street",
"YearRemodAdd",
"Heating",
"MasVnrType",
"BsmtUnfSF",
"Foundation",
"MasVnrArea",
"MSSubClass",
"ExterQual",
"Condition2",
"GarageCars",
"GarageType",
"OverallQual",
"TotalBsmtSF",
"BsmtFinSF1",
"HouseStyle",
"MiscFeature",
"MoSold",
]
X = X.loc[:, features]
X, y = shuffle(X, y, random_state=0)
X = X.iloc[:600]
y = y.iloc[:600]
return X, np.log(y)
def stacked_model(model1,model2,model3):
X, y = load_ames_housing()
estimators = []
for model in [model1,model2,model3]:
download(repo_id=model, dst='temp_dir')
pipeline = joblib.load( "temp_dir/model.pkl")
estimators.append((model.split('/')[-1], pipeline))
shutil.rmtree("temp_dir")
stacking_regressor = StackingRegressor(estimators=estimators, final_estimator=RidgeCV())
# plot and compare the performance of the single models and the stacked model
import time
import matplotlib.pyplot as plt
from sklearn.metrics import PredictionErrorDisplay
from sklearn.model_selection import cross_validate, cross_val_predict
fig, axs = plt.subplots(2, 2, figsize=(9, 7))
axs = np.ravel(axs)
for ax, (name, est) in zip(
axs, estimators + [("Stacking Regressor", stacking_regressor)]
):
scorers = {"R2": "r2", "MAE": "neg_mean_absolute_error"}
start_time = time.time()
scores = cross_validate(
est, X, y, scoring=list(scorers.values()), n_jobs=-1, verbose=0
)
elapsed_time = time.time() - start_time
y_pred = cross_val_predict(est, X, y, n_jobs=-1, verbose=0)
scores = {
key: (
f"{np.abs(np.mean(scores[f'test_{value}'])):.2f} +- "
f"{np.std(scores[f'test_{value}']):.2f}"
)
for key, value in scorers.items()
}
display = PredictionErrorDisplay.from_predictions(
y_true=y,
y_pred=y_pred,
kind="actual_vs_predicted",
ax=ax,
scatter_kwargs={"alpha": 0.2, "color": "tab:blue"},
line_kwargs={"color": "tab:red"},
)
ax.set_title(f"{name}\nEvaluation in {elapsed_time:.2f} seconds")
for name, score in scores.items():
ax.plot([], [], " ", label=f"{name}: {score}")
ax.legend(loc="upper left")
fig.suptitle("Single predictor versus stacked predictors")
fig.tight_layout()
fig.subplots_adjust(top=0.9)
return fig
title = "Combine predictors using stacking"
with gr.Blocks(title=title) as demo:
gr.Markdown(f"## {title}")
gr.Markdown("""
This app demonstrates combining 3 predictors trained on Ames housing dataset from OpenML using stacking and Ridge estimator as final estimator.
Stacking uses a meta-learning algorithm to learn how to combine the predictions from trained models.
The OpenML Ames housing dataset is a processed version of the 'Ames Iowa Housing' with 81 features.
This app is developed based on [scikit-learn example](https://scikit-learn.org/stable/auto_examples/ensemble/plot_stack_predictors.html#sphx-glr-auto-examples-ensemble-plot-stack-predictors-py)
""")
model1 = gr.Textbox(label="Repo id of first model", value="haizad/ames-housing-random-forest-predictor")
model2 = gr.Textbox(label="Repo id of second model", value="haizad/ames-housing-gbdt-predictor")
model3 = gr.Textbox(label="Repo id of third model", value="haizad/ames-housing-lasso-predictor")
plot = gr.Plot(label="Comparison of single predictor against stacked predictor")
stack_btn = gr.Button("Stack")
stack_btn.click(fn=stacked_model, inputs=[model1,model2,model3], outputs=[plot])
demo.launch()