GMARTINEZMILLA commited on
Commit
9f5e05c
1 Parent(s): 4508fcb

bugfix: added import lgbm

Browse files
Files changed (1) hide show
  1. app.py +168 -0
app.py CHANGED
@@ -104,6 +104,143 @@ if page == "":
104
  st.write("Use the dropdown menu to navigate between the different sections.")
105
 
106
  # Customer Analysis Page
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  elif page == "Customer Analysis":
108
  st.title("Customer Analysis")
109
  st.markdown("Use the tools below to explore your customer data.")
@@ -130,34 +267,64 @@ elif page == "Customer Analysis":
130
  gbm = lgb.Booster(model_file=model_path)
131
  st.write(f"Loaded model for cluster {cluster}")
132
 
 
 
 
 
 
 
 
133
  # Load X_predict for that cluster
134
  X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
 
 
 
135
 
136
  # Filter for the specific customer
137
  X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
 
 
 
138
 
139
  if not X_cliente.empty:
140
  # Prepare data for prediction
141
  features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
 
 
 
142
 
143
  # Make Prediction for the selected customer
144
  y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
 
 
 
 
 
145
 
146
  # Reassemble the results
147
  results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
148
  results['ventas_predichas'] = y_pred
 
 
 
149
 
150
  st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
151
 
152
  # Load actual data
153
  df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
154
  actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
 
 
 
155
 
156
  if not actual_sales.empty:
157
  results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
158
  on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
159
  how='left')
160
  results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
 
 
 
161
 
162
  # Calculate metrics only for non-null actual sales
163
  valid_results = results.dropna(subset=['ventas_reales'])
@@ -241,6 +408,7 @@ elif page == "Customer Analysis":
241
  else:
242
  st.warning("Please select a customer.")
243
 
 
244
  # Customer Recommendations Page
245
  elif page == "Articles Recommendations":
246
  st.title("Articles Recommendations")
 
104
  st.write("Use the dropdown menu to navigate between the different sections.")
105
 
106
  # Customer Analysis Page
107
+ # elif page == "Customer Analysis":
108
+ # st.title("Customer Analysis")
109
+ # st.markdown("Use the tools below to explore your customer data.")
110
+
111
+ # partial_code = st.text_input("Enter part of Customer Code (or leave empty to see all)")
112
+ # if partial_code:
113
+ # filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
114
+ # else:
115
+ # filtered_customers = df
116
+ # customer_list = filtered_customers['CLIENTE'].unique()
117
+ # customer_code = st.selectbox("Select Customer Code", customer_list)
118
+
119
+ # if st.button("Calcular"):
120
+ # if customer_code:
121
+ # # Find Customer's Cluster
122
+ # customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
123
+
124
+ # if not customer_match.empty:
125
+ # cluster = customer_match['cluster_id'].values[0]
126
+ # st.write(f"Customer {customer_code} belongs to cluster {cluster}")
127
+
128
+ # # Load the Corresponding Model
129
+ # model_path = f'models/modelo_cluster_{cluster}.txt'
130
+ # gbm = lgb.Booster(model_file=model_path)
131
+ # st.write(f"Loaded model for cluster {cluster}")
132
+
133
+ # # Load X_predict for that cluster
134
+ # X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
135
+
136
+ # # Filter for the specific customer
137
+ # X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
138
+
139
+ # if not X_cliente.empty:
140
+ # # Prepare data for prediction
141
+ # features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
142
+
143
+ # # Make Prediction for the selected customer
144
+ # y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
145
+
146
+ # # Reassemble the results
147
+ # results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
148
+ # results['ventas_predichas'] = y_pred
149
+
150
+ # st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
151
+
152
+ # # Load actual data
153
+ # df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
154
+ # actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
155
+
156
+ # if not actual_sales.empty:
157
+ # results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
158
+ # on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
159
+ # how='left')
160
+ # results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
161
+
162
+ # # Calculate metrics only for non-null actual sales
163
+ # valid_results = results.dropna(subset=['ventas_reales'])
164
+ # if not valid_results.empty:
165
+ # mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
166
+ # mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
167
+ # rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
168
+
169
+ # st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
170
+ # st.write(f"MAE: {mae:.2f}")
171
+ # st.write(f"MAPE: {mape:.2f}%")
172
+ # st.write(f"RMSE: {rmse:.2f}")
173
+
174
+ # # Analysis of results
175
+ # threshold_good = 100 # You may want to adjust this threshold
176
+ # if mae < threshold_good:
177
+ # st.success(f"Customer {customer_code} is performing well based on the predictions.")
178
+ # else:
179
+ # st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
180
+ # else:
181
+ # st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
182
+
183
+ # # Show the radar chart
184
+ # all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
185
+ # all_manufacturers.index = all_manufacturers.index.astype(str)
186
+
187
+ # sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
188
+ # sales_data.index = sales_data.index.astype(str)
189
+
190
+ # sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
191
+ # sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
192
+
193
+ # top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
194
+ # top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
195
+ # combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
196
+ # combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
197
+
198
+ # combined_data = pd.DataFrame({
199
+ # 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
200
+ # 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
201
+ # }).fillna(0)
202
+
203
+ # combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
204
+ # non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
205
+
206
+ # if len(non_zero_manufacturers) < 3:
207
+ # zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
208
+ # manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
209
+ # else:
210
+ # manufacturers_to_show = non_zero_manufacturers
211
+
212
+ # values = manufacturers_to_show['units'].tolist()
213
+ # amounts = manufacturers_to_show['sales'].tolist()
214
+ # manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
215
+
216
+ # st.write(f"### Results for top {len(manufacturers)} manufacturers:")
217
+ # for manufacturer, value, amount in zip(manufacturers, values, amounts):
218
+ # st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
219
+
220
+ # if manufacturers:
221
+ # fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
222
+ # st.pyplot(fig)
223
+ # else:
224
+ # st.warning("No data available to create the radar chart.")
225
+
226
+ # # Show sales over the years graph
227
+ # sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
228
+ # if all(col in ventas_clientes.columns for col in sales_columns):
229
+ # years = ['2021', '2022', '2023']
230
+ # customer_sales = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code][sales_columns].values[0]
231
+
232
+ # fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
233
+ # fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
234
+ # st.plotly_chart(fig_sales)
235
+ # else:
236
+ # st.warning("Sales data for 2021-2023 not available.")
237
+ # else:
238
+ # st.warning(f"No prediction data found for customer {customer_code}.")
239
+ # else:
240
+ # st.warning(f"No data found for customer {customer_code}. Please check the code.")
241
+ # else:
242
+ # st.warning("Please select a customer.")
243
+
244
  elif page == "Customer Analysis":
245
  st.title("Customer Analysis")
246
  st.markdown("Use the tools below to explore your customer data.")
 
267
  gbm = lgb.Booster(model_file=model_path)
268
  st.write(f"Loaded model for cluster {cluster}")
269
 
270
+ # Inspect the model
271
+ st.write("### Model Information:")
272
+ st.write(f"Number of trees: {gbm.num_trees()}")
273
+ st.write(f"Number of features: {gbm.num_feature()}")
274
+ st.write("Feature names:")
275
+ st.write(gbm.feature_name())
276
+
277
  # Load X_predict for that cluster
278
  X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
279
+ st.write("### X_predict_cluster DataFrame:")
280
+ st.write(X_predict_cluster.head())
281
+ st.write(f"Shape: {X_predict_cluster.shape}")
282
 
283
  # Filter for the specific customer
284
  X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
285
+ st.write("### X_cliente DataFrame:")
286
+ st.write(X_cliente.head())
287
+ st.write(f"Shape: {X_cliente.shape}")
288
 
289
  if not X_cliente.empty:
290
  # Prepare data for prediction
291
  features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
292
+ st.write("### Features for Prediction:")
293
+ st.write(features_for_prediction.head())
294
+ st.write(f"Shape: {features_for_prediction.shape}")
295
 
296
  # Make Prediction for the selected customer
297
  y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
298
+ st.write("### Prediction Results:")
299
+ st.write(f"Type of y_pred: {type(y_pred)}")
300
+ st.write(f"Shape of y_pred: {y_pred.shape}")
301
+ st.write("First few predictions:")
302
+ st.write(y_pred[:5])
303
 
304
  # Reassemble the results
305
  results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
306
  results['ventas_predichas'] = y_pred
307
+ st.write("### Results DataFrame:")
308
+ st.write(results.head())
309
+ st.write(f"Shape: {results.shape}")
310
 
311
  st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
312
 
313
  # Load actual data
314
  df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
315
  actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
316
+ st.write("### Actual Sales DataFrame:")
317
+ st.write(actual_sales.head())
318
+ st.write(f"Shape: {actual_sales.shape}")
319
 
320
  if not actual_sales.empty:
321
  results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
322
  on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
323
  how='left')
324
  results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
325
+ st.write("### Final Results DataFrame:")
326
+ st.write(results.head())
327
+ st.write(f"Shape: {results.shape}")
328
 
329
  # Calculate metrics only for non-null actual sales
330
  valid_results = results.dropna(subset=['ventas_reales'])
 
408
  else:
409
  st.warning("Please select a customer.")
410
 
411
+
412
  # Customer Recommendations Page
413
  elif page == "Articles Recommendations":
414
  st.title("Articles Recommendations")