Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from pandas_profiling import ProfileReport | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LinearRegression | |
from sklearn.tree import DecisionTreeRegressor | |
from sklearn.ensemble import RandomForestRegressor | |
import plotly.express as px | |
# Data Ingestion | |
uploaded_file = st.file_uploader("Upload your dataset:", type=["csv", "xlsx", "json"]) | |
if uploaded_file: | |
df = pd.read_csv(uploaded_file) | |
st.write("Data Preview:") | |
st.write(df.head()) | |
# Data Preparation | |
st.write("Data Preparation:") | |
profile = ProfileReport(df, title="Data Profiling Report") | |
st.write(profile) | |
# Data Cleaning | |
st.write("Data Cleaning:") | |
handle_missing_values = st.selectbox("Handle missing values:", ["Mean",_"Median",_"Imputation"]) | |
handle_outliers = st.selectbox("Handle outliers:", ["Standardization",_"_Winsorization"]) | |
# Model Training | |
st.write("Model Training:") | |
model_type = st.selectbox("Choose a model:", ["Linear_Regression",_"Decision_Trees",_"Random_Forest"]) | |
hyperparams = {} | |
if model_type == "Linear Regression": | |
hyperparams["alpha"] = st.slider("Regularization strength:", 0.1, 10.0) | |
elif model_type == "Decision Trees": | |
hyperparams["max_depth"] = st.slider("Maximum depth:", 1, 10) | |
elif model_type == "Random Forest": | |
hyperparams["n_estimators"] = st.slider("Number of estimators:", 10, 100) | |
X_train, X_test, y_train, y_test = train_test_split(df.drop("target", axis=1), df["target"], test_size=0.2, random_state=42) | |
if model_type == "Linear Regression": | |
model = LinearRegression(**hyperparams) | |
elif model_type == "Decision Trees": | |
model = DecisionTreeRegressor(**hyperparams) | |
elif model_type == "Random Forest": | |
model = RandomForestRegressor(**hyperparams) | |
model.fit(X_train, y_train) | |
y_pred = model.predict(X_test) | |
# Model Evaluation | |
st.write("Model Evaluation:") | |
st.write("Accuracy:", model.score(X_test, y_test)) | |
st.write("Confusion Matrix:") | |
conf_mat = pd.crosstab(y_test, y_pred, rownames=["Actual"], colnames=["Predicted"]) | |
st.plotly(px.imshow(conf_mat, color_continuous_scale="blues"), use_container_width=True) | |
# Model Deployment | |
st.write("Model Deployment:") | |
download_model = st.download_button("Download trained model", data=model, file_name="model.py") | |
deploy_to_cloud = st.button("Deploy to cloud platform") |