Spaces:
Sleeping
Sleeping
GMARTINEZMILLA
commited on
Commit
•
ddf19f6
1
Parent(s):
4cdd823
bugfix: added import lgbm
Browse files
app.py
CHANGED
@@ -125,7 +125,7 @@ elif page == "Customer Analysis":
|
|
125 |
if not customer_data.empty and not customer_euros.empty:
|
126 |
st.write(f"### Analysis for Customer {customer_code}")
|
127 |
|
128 |
-
#
|
129 |
customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
|
130 |
|
131 |
if not customer_match.empty:
|
@@ -135,44 +135,44 @@ elif page == "Customer Analysis":
|
|
135 |
st.error(f"Customer {customer_code} not found in customer_clusters.")
|
136 |
st.stop() # Stop further execution if no cluster is found
|
137 |
|
138 |
-
#
|
139 |
model_path = f'models/modelo_cluster_{cluster}.txt'
|
140 |
gbm = lgb.Booster(model_file=model_path)
|
141 |
st.write(f"Loaded model for cluster {cluster}")
|
142 |
|
143 |
-
#
|
144 |
X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
|
145 |
X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
|
146 |
|
147 |
if not X_cliente.empty:
|
148 |
-
#
|
149 |
y_pred = gbm.predict(X_cliente.drop(columns=['cliente_id']), num_iteration=gbm.best_iteration)
|
150 |
-
st.write(f"Predicted sales for Customer {customer_code}: {y_pred
|
151 |
|
152 |
-
#
|
153 |
df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
|
154 |
actual_sales = df_agg_2024[(df_agg_2024['cliente_id'] == customer_code) & (df_agg_2024['marca_id_encoded'].isin(X_cliente['marca_id_encoded']))]
|
155 |
if not actual_sales.empty:
|
156 |
-
merged_data = pd.
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
# Calculate metrics (MAE, MAPE, RMSE, SMAPE)
|
165 |
mae = mean_absolute_error(merged_data['ventas_reales'], merged_data['ventas_predichas'])
|
166 |
mape = np.mean(np.abs((merged_data['ventas_reales'] - merged_data['ventas_predichas']) / merged_data['ventas_reales'])) * 100
|
167 |
rmse = np.sqrt(mean_squared_error(merged_data['ventas_reales'], merged_data['ventas_predichas']))
|
168 |
smape_value = smape(merged_data['ventas_reales'], merged_data['ventas_predichas'])
|
169 |
|
|
|
170 |
st.write(f"MAE: {mae:.2f}")
|
171 |
st.write(f"MAPE: {mape:.2f}%")
|
172 |
st.write(f"RMSE: {rmse:.2f}")
|
173 |
st.write(f"SMAPE: {smape_value:.2f}%")
|
174 |
|
175 |
-
#
|
|
|
176 |
if mae < threshold_good:
|
177 |
st.success(f"Customer {customer_code} is performing well based on the predictions.")
|
178 |
else:
|
|
|
125 |
if not customer_data.empty and not customer_euros.empty:
|
126 |
st.write(f"### Analysis for Customer {customer_code}")
|
127 |
|
128 |
+
# Find Customer's Cluster
|
129 |
customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
|
130 |
|
131 |
if not customer_match.empty:
|
|
|
135 |
st.error(f"Customer {customer_code} not found in customer_clusters.")
|
136 |
st.stop() # Stop further execution if no cluster is found
|
137 |
|
138 |
+
# Load the Corresponding Model
|
139 |
model_path = f'models/modelo_cluster_{cluster}.txt'
|
140 |
gbm = lgb.Booster(model_file=model_path)
|
141 |
st.write(f"Loaded model for cluster {cluster}")
|
142 |
|
143 |
+
# Load X_predict for that cluster and extract customer-specific data
|
144 |
X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
|
145 |
X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
|
146 |
|
147 |
if not X_cliente.empty:
|
148 |
+
# Make Prediction for the selected customer
|
149 |
y_pred = gbm.predict(X_cliente.drop(columns=['cliente_id']), num_iteration=gbm.best_iteration)
|
150 |
+
st.write(f"Predicted sales for Customer {customer_code}: {y_pred.sum():.2f}")
|
151 |
|
152 |
+
# Merge with actual data from df_agg_2024
|
153 |
df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
|
154 |
actual_sales = df_agg_2024[(df_agg_2024['cliente_id'] == customer_code) & (df_agg_2024['marca_id_encoded'].isin(X_cliente['marca_id_encoded']))]
|
155 |
if not actual_sales.empty:
|
156 |
+
merged_data = pd.DataFrame({
|
157 |
+
'cliente_id': [customer_code],
|
158 |
+
'ventas_predichas': [y_pred.sum()],
|
159 |
+
'ventas_reales': [actual_sales['precio_total'].sum()]
|
160 |
+
})
|
161 |
+
|
162 |
+
# Calculate metrics
|
|
|
|
|
163 |
mae = mean_absolute_error(merged_data['ventas_reales'], merged_data['ventas_predichas'])
|
164 |
mape = np.mean(np.abs((merged_data['ventas_reales'] - merged_data['ventas_predichas']) / merged_data['ventas_reales'])) * 100
|
165 |
rmse = np.sqrt(mean_squared_error(merged_data['ventas_reales'], merged_data['ventas_predichas']))
|
166 |
smape_value = smape(merged_data['ventas_reales'], merged_data['ventas_predichas'])
|
167 |
|
168 |
+
st.write(f"Actual sales for Customer {customer_code}: {merged_data['ventas_reales'].values[0]:.2f}")
|
169 |
st.write(f"MAE: {mae:.2f}")
|
170 |
st.write(f"MAPE: {mape:.2f}%")
|
171 |
st.write(f"RMSE: {rmse:.2f}")
|
172 |
st.write(f"SMAPE: {smape_value:.2f}%")
|
173 |
|
174 |
+
# Analysis of results
|
175 |
+
threshold_good = 100 # You may want to adjust this threshold
|
176 |
if mae < threshold_good:
|
177 |
st.success(f"Customer {customer_code} is performing well based on the predictions.")
|
178 |
else:
|