Spaces:
Sleeping
Sleeping
GMARTINEZMILLA
commited on
Commit
•
9f5e05c
1
Parent(s):
4508fcb
bugfix: added import lgbm
Browse files
app.py
CHANGED
@@ -104,6 +104,143 @@ if page == "":
|
|
104 |
st.write("Use the dropdown menu to navigate between the different sections.")
|
105 |
|
106 |
# Customer Analysis Page
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
elif page == "Customer Analysis":
|
108 |
st.title("Customer Analysis")
|
109 |
st.markdown("Use the tools below to explore your customer data.")
|
@@ -130,34 +267,64 @@ elif page == "Customer Analysis":
|
|
130 |
gbm = lgb.Booster(model_file=model_path)
|
131 |
st.write(f"Loaded model for cluster {cluster}")
|
132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
# Load X_predict for that cluster
|
134 |
X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
|
|
|
|
|
|
|
135 |
|
136 |
# Filter for the specific customer
|
137 |
X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
|
|
|
|
|
|
|
138 |
|
139 |
if not X_cliente.empty:
|
140 |
# Prepare data for prediction
|
141 |
features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
|
|
|
|
|
|
|
142 |
|
143 |
# Make Prediction for the selected customer
|
144 |
y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
# Reassemble the results
|
147 |
results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
|
148 |
results['ventas_predichas'] = y_pred
|
|
|
|
|
|
|
149 |
|
150 |
st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
|
151 |
|
152 |
# Load actual data
|
153 |
df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
|
154 |
actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
|
|
|
|
|
|
|
155 |
|
156 |
if not actual_sales.empty:
|
157 |
results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
|
158 |
on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
|
159 |
how='left')
|
160 |
results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
|
|
|
|
|
|
|
161 |
|
162 |
# Calculate metrics only for non-null actual sales
|
163 |
valid_results = results.dropna(subset=['ventas_reales'])
|
@@ -241,6 +408,7 @@ elif page == "Customer Analysis":
|
|
241 |
else:
|
242 |
st.warning("Please select a customer.")
|
243 |
|
|
|
244 |
# Customer Recommendations Page
|
245 |
elif page == "Articles Recommendations":
|
246 |
st.title("Articles Recommendations")
|
|
|
104 |
st.write("Use the dropdown menu to navigate between the different sections.")
|
105 |
|
106 |
# Customer Analysis Page
|
107 |
+
# elif page == "Customer Analysis":
|
108 |
+
# st.title("Customer Analysis")
|
109 |
+
# st.markdown("Use the tools below to explore your customer data.")
|
110 |
+
|
111 |
+
# partial_code = st.text_input("Enter part of Customer Code (or leave empty to see all)")
|
112 |
+
# if partial_code:
|
113 |
+
# filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
|
114 |
+
# else:
|
115 |
+
# filtered_customers = df
|
116 |
+
# customer_list = filtered_customers['CLIENTE'].unique()
|
117 |
+
# customer_code = st.selectbox("Select Customer Code", customer_list)
|
118 |
+
|
119 |
+
# if st.button("Calcular"):
|
120 |
+
# if customer_code:
|
121 |
+
# # Find Customer's Cluster
|
122 |
+
# customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
|
123 |
+
|
124 |
+
# if not customer_match.empty:
|
125 |
+
# cluster = customer_match['cluster_id'].values[0]
|
126 |
+
# st.write(f"Customer {customer_code} belongs to cluster {cluster}")
|
127 |
+
|
128 |
+
# # Load the Corresponding Model
|
129 |
+
# model_path = f'models/modelo_cluster_{cluster}.txt'
|
130 |
+
# gbm = lgb.Booster(model_file=model_path)
|
131 |
+
# st.write(f"Loaded model for cluster {cluster}")
|
132 |
+
|
133 |
+
# # Load X_predict for that cluster
|
134 |
+
# X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
|
135 |
+
|
136 |
+
# # Filter for the specific customer
|
137 |
+
# X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
|
138 |
+
|
139 |
+
# if not X_cliente.empty:
|
140 |
+
# # Prepare data for prediction
|
141 |
+
# features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
|
142 |
+
|
143 |
+
# # Make Prediction for the selected customer
|
144 |
+
# y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
|
145 |
+
|
146 |
+
# # Reassemble the results
|
147 |
+
# results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
|
148 |
+
# results['ventas_predichas'] = y_pred
|
149 |
+
|
150 |
+
# st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
|
151 |
+
|
152 |
+
# # Load actual data
|
153 |
+
# df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
|
154 |
+
# actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
|
155 |
+
|
156 |
+
# if not actual_sales.empty:
|
157 |
+
# results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
|
158 |
+
# on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
|
159 |
+
# how='left')
|
160 |
+
# results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
|
161 |
+
|
162 |
+
# # Calculate metrics only for non-null actual sales
|
163 |
+
# valid_results = results.dropna(subset=['ventas_reales'])
|
164 |
+
# if not valid_results.empty:
|
165 |
+
# mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
|
166 |
+
# mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
|
167 |
+
# rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
|
168 |
+
|
169 |
+
# st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
|
170 |
+
# st.write(f"MAE: {mae:.2f}")
|
171 |
+
# st.write(f"MAPE: {mape:.2f}%")
|
172 |
+
# st.write(f"RMSE: {rmse:.2f}")
|
173 |
+
|
174 |
+
# # Analysis of results
|
175 |
+
# threshold_good = 100 # You may want to adjust this threshold
|
176 |
+
# if mae < threshold_good:
|
177 |
+
# st.success(f"Customer {customer_code} is performing well based on the predictions.")
|
178 |
+
# else:
|
179 |
+
# st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
|
180 |
+
# else:
|
181 |
+
# st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
|
182 |
+
|
183 |
+
# # Show the radar chart
|
184 |
+
# all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
|
185 |
+
# all_manufacturers.index = all_manufacturers.index.astype(str)
|
186 |
+
|
187 |
+
# sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
|
188 |
+
# sales_data.index = sales_data.index.astype(str)
|
189 |
+
|
190 |
+
# sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
|
191 |
+
# sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
|
192 |
+
|
193 |
+
# top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
|
194 |
+
# top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
|
195 |
+
# combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
|
196 |
+
# combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
|
197 |
+
|
198 |
+
# combined_data = pd.DataFrame({
|
199 |
+
# 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
|
200 |
+
# 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
|
201 |
+
# }).fillna(0)
|
202 |
+
|
203 |
+
# combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
|
204 |
+
# non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
|
205 |
+
|
206 |
+
# if len(non_zero_manufacturers) < 3:
|
207 |
+
# zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
|
208 |
+
# manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
|
209 |
+
# else:
|
210 |
+
# manufacturers_to_show = non_zero_manufacturers
|
211 |
+
|
212 |
+
# values = manufacturers_to_show['units'].tolist()
|
213 |
+
# amounts = manufacturers_to_show['sales'].tolist()
|
214 |
+
# manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
|
215 |
+
|
216 |
+
# st.write(f"### Results for top {len(manufacturers)} manufacturers:")
|
217 |
+
# for manufacturer, value, amount in zip(manufacturers, values, amounts):
|
218 |
+
# st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
|
219 |
+
|
220 |
+
# if manufacturers:
|
221 |
+
# fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
|
222 |
+
# st.pyplot(fig)
|
223 |
+
# else:
|
224 |
+
# st.warning("No data available to create the radar chart.")
|
225 |
+
|
226 |
+
# # Show sales over the years graph
|
227 |
+
# sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
|
228 |
+
# if all(col in ventas_clientes.columns for col in sales_columns):
|
229 |
+
# years = ['2021', '2022', '2023']
|
230 |
+
# customer_sales = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code][sales_columns].values[0]
|
231 |
+
|
232 |
+
# fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
|
233 |
+
# fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
|
234 |
+
# st.plotly_chart(fig_sales)
|
235 |
+
# else:
|
236 |
+
# st.warning("Sales data for 2021-2023 not available.")
|
237 |
+
# else:
|
238 |
+
# st.warning(f"No prediction data found for customer {customer_code}.")
|
239 |
+
# else:
|
240 |
+
# st.warning(f"No data found for customer {customer_code}. Please check the code.")
|
241 |
+
# else:
|
242 |
+
# st.warning("Please select a customer.")
|
243 |
+
|
244 |
elif page == "Customer Analysis":
|
245 |
st.title("Customer Analysis")
|
246 |
st.markdown("Use the tools below to explore your customer data.")
|
|
|
267 |
gbm = lgb.Booster(model_file=model_path)
|
268 |
st.write(f"Loaded model for cluster {cluster}")
|
269 |
|
270 |
+
# Inspect the model
|
271 |
+
st.write("### Model Information:")
|
272 |
+
st.write(f"Number of trees: {gbm.num_trees()}")
|
273 |
+
st.write(f"Number of features: {gbm.num_feature()}")
|
274 |
+
st.write("Feature names:")
|
275 |
+
st.write(gbm.feature_name())
|
276 |
+
|
277 |
# Load X_predict for that cluster
|
278 |
X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
|
279 |
+
st.write("### X_predict_cluster DataFrame:")
|
280 |
+
st.write(X_predict_cluster.head())
|
281 |
+
st.write(f"Shape: {X_predict_cluster.shape}")
|
282 |
|
283 |
# Filter for the specific customer
|
284 |
X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
|
285 |
+
st.write("### X_cliente DataFrame:")
|
286 |
+
st.write(X_cliente.head())
|
287 |
+
st.write(f"Shape: {X_cliente.shape}")
|
288 |
|
289 |
if not X_cliente.empty:
|
290 |
# Prepare data for prediction
|
291 |
features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
|
292 |
+
st.write("### Features for Prediction:")
|
293 |
+
st.write(features_for_prediction.head())
|
294 |
+
st.write(f"Shape: {features_for_prediction.shape}")
|
295 |
|
296 |
# Make Prediction for the selected customer
|
297 |
y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
|
298 |
+
st.write("### Prediction Results:")
|
299 |
+
st.write(f"Type of y_pred: {type(y_pred)}")
|
300 |
+
st.write(f"Shape of y_pred: {y_pred.shape}")
|
301 |
+
st.write("First few predictions:")
|
302 |
+
st.write(y_pred[:5])
|
303 |
|
304 |
# Reassemble the results
|
305 |
results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
|
306 |
results['ventas_predichas'] = y_pred
|
307 |
+
st.write("### Results DataFrame:")
|
308 |
+
st.write(results.head())
|
309 |
+
st.write(f"Shape: {results.shape}")
|
310 |
|
311 |
st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
|
312 |
|
313 |
# Load actual data
|
314 |
df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
|
315 |
actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
|
316 |
+
st.write("### Actual Sales DataFrame:")
|
317 |
+
st.write(actual_sales.head())
|
318 |
+
st.write(f"Shape: {actual_sales.shape}")
|
319 |
|
320 |
if not actual_sales.empty:
|
321 |
results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
|
322 |
on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
|
323 |
how='left')
|
324 |
results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
|
325 |
+
st.write("### Final Results DataFrame:")
|
326 |
+
st.write(results.head())
|
327 |
+
st.write(f"Shape: {results.shape}")
|
328 |
|
329 |
# Calculate metrics only for non-null actual sales
|
330 |
valid_results = results.dropna(subset=['ventas_reales'])
|
|
|
408 |
else:
|
409 |
st.warning("Please select a customer.")
|
410 |
|
411 |
+
|
412 |
# Customer Recommendations Page
|
413 |
elif page == "Articles Recommendations":
|
414 |
st.title("Articles Recommendations")
|