|
import gradio as gr |
|
import numpy as np |
|
import matplotlib |
|
matplotlib.use("Agg") |
|
import matplotlib.pyplot as plt |
|
from sklearn.datasets import fetch_openml |
|
from sklearn.utils import shuffle |
|
from sklearn.ensemble import StackingRegressor |
|
from sklearn.linear_model import RidgeCV |
|
from skops.hub_utils import download |
|
import joblib |
|
import shutil |
|
|
|
|
|
def load_ames_housing(): |
|
df = fetch_openml(name="house_prices", as_frame=True, parser="pandas") |
|
X = df.data |
|
y = df.target |
|
|
|
features = [ |
|
"YrSold", |
|
"HeatingQC", |
|
"Street", |
|
"YearRemodAdd", |
|
"Heating", |
|
"MasVnrType", |
|
"BsmtUnfSF", |
|
"Foundation", |
|
"MasVnrArea", |
|
"MSSubClass", |
|
"ExterQual", |
|
"Condition2", |
|
"GarageCars", |
|
"GarageType", |
|
"OverallQual", |
|
"TotalBsmtSF", |
|
"BsmtFinSF1", |
|
"HouseStyle", |
|
"MiscFeature", |
|
"MoSold", |
|
] |
|
|
|
X = X.loc[:, features] |
|
X, y = shuffle(X, y, random_state=0) |
|
|
|
X = X.iloc[:600] |
|
y = y.iloc[:600] |
|
return X, np.log(y) |
|
|
|
def stacked_model(model1,model2,model3): |
|
X, y = load_ames_housing() |
|
estimators = [] |
|
for model in [model1,model2,model3]: |
|
download(repo_id=model, dst='temp_dir') |
|
pipeline = joblib.load( "temp_dir/model.pkl") |
|
estimators.append((model.split('/')[-1], pipeline)) |
|
shutil.rmtree("temp_dir") |
|
|
|
stacking_regressor = StackingRegressor(estimators=estimators, final_estimator=RidgeCV()) |
|
|
|
|
|
import time |
|
import matplotlib.pyplot as plt |
|
from sklearn.metrics import PredictionErrorDisplay |
|
from sklearn.model_selection import cross_validate, cross_val_predict |
|
|
|
fig, axs = plt.subplots(2, 2, figsize=(9, 7)) |
|
axs = np.ravel(axs) |
|
|
|
for ax, (name, est) in zip( |
|
axs, estimators + [("Stacking Regressor", stacking_regressor)] |
|
): |
|
scorers = {"R2": "r2", "MAE": "neg_mean_absolute_error"} |
|
|
|
start_time = time.time() |
|
scores = cross_validate( |
|
est, X, y, scoring=list(scorers.values()), n_jobs=-1, verbose=0 |
|
) |
|
|
|
elapsed_time = time.time() - start_time |
|
|
|
y_pred = cross_val_predict(est, X, y, n_jobs=-1, verbose=0) |
|
scores = { |
|
key: ( |
|
f"{np.abs(np.mean(scores[f'test_{value}'])):.2f} +- " |
|
f"{np.std(scores[f'test_{value}']):.2f}" |
|
) |
|
for key, value in scorers.items() |
|
} |
|
|
|
display = PredictionErrorDisplay.from_predictions( |
|
y_true=y, |
|
y_pred=y_pred, |
|
kind="actual_vs_predicted", |
|
ax=ax, |
|
scatter_kwargs={"alpha": 0.2, "color": "tab:blue"}, |
|
line_kwargs={"color": "tab:red"}, |
|
) |
|
ax.set_title(f"{name}\nEvaluation in {elapsed_time:.2f} seconds") |
|
|
|
for name, score in scores.items(): |
|
ax.plot([], [], " ", label=f"{name}: {score}") |
|
ax.legend(loc="upper left") |
|
|
|
fig.suptitle("Single predictor versus stacked predictors") |
|
fig.tight_layout() |
|
fig.subplots_adjust(top=0.9) |
|
return fig |
|
|
|
title = "Combine predictors using stacking" |
|
with gr.Blocks(title=title) as demo: |
|
gr.Markdown(f"## {title}") |
|
gr.Markdown(""" |
|
This app demonstrates combining 3 predictors trained on Ames housing dataset from OpenML using stacking and Ridge estimator as final estimator. |
|
Stacking uses a meta-learning algorithm to learn how to combine the predictions from trained models. |
|
The OpenML Ames housing dataset is a processed version of the 'Ames Iowa Housing' with 81 features. |
|
This app is developed based on [scikit-learn example](https://scikit-learn.org/stable/auto_examples/ensemble/plot_stack_predictors.html#sphx-glr-auto-examples-ensemble-plot-stack-predictors-py) |
|
""") |
|
|
|
model1 = gr.Textbox(label="Repo id of first model", value="haizad/ames-housing-random-forest-predictor") |
|
model2 = gr.Textbox(label="Repo id of second model", value="haizad/ames-housing-gbdt-predictor") |
|
model3 = gr.Textbox(label="Repo id of third model", value="haizad/ames-housing-lasso-predictor") |
|
plot = gr.Plot(label="Comparison of single predictor against stacked predictor") |
|
stack_btn = gr.Button("Stack") |
|
stack_btn.click(fn=stacked_model, inputs=[model1,model2,model3], outputs=[plot]) |
|
|
|
demo.launch() |
|
|
|
|