GMARTINEZMILLA commited on
Commit
524fe4d
1 Parent(s): 6c1c89e

feat: updated app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -141
app.py CHANGED
@@ -6,16 +6,18 @@ import numpy as np
6
  import lightgbm as lgb
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
 
9
 
10
  # Page configuration
11
  st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
12
 
13
- # Load CSV files at the top, only once
14
  df = pd.read_csv("df_clean.csv")
15
  nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
16
  euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
17
  ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
18
  customer_clusters = pd.read_csv('predicts/customer_clusters.csv') # Load the customer clusters here
 
19
 
20
  # Ensure customer codes are strings
21
  df['CLIENTE'] = df['CLIENTE'].astype(str)
@@ -25,6 +27,7 @@ customer_clusters['cliente_id'] = customer_clusters['cliente_id'].astype(str) #
25
  fieles_df = pd.read_csv("clientes_relevantes.csv")
26
  cestas = pd.read_csv("cestas.csv")
27
  productos = pd.read_csv("productos.csv")
 
28
 
29
  # Convert all columns except 'CLIENTE' to float in euros_proveedor
30
  for col in euros_proveedor.columns:
@@ -103,144 +106,6 @@ if page == "":
103
  st.markdown("## Welcome to the Customer Insights App")
104
  st.write("Use the dropdown menu to navigate between the different sections.")
105
 
106
- # Customer Analysis Page
107
- # elif page == "Customer Analysis":
108
- # st.title("Customer Analysis")
109
- # st.markdown("Use the tools below to explore your customer data.")
110
-
111
- # partial_code = st.text_input("Enter part of Customer Code (or leave empty to see all)")
112
- # if partial_code:
113
- # filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
114
- # else:
115
- # filtered_customers = df
116
- # customer_list = filtered_customers['CLIENTE'].unique()
117
- # customer_code = st.selectbox("Select Customer Code", customer_list)
118
-
119
- # if st.button("Calcular"):
120
- # if customer_code:
121
- # # Find Customer's Cluster
122
- # customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
123
-
124
- # if not customer_match.empty:
125
- # cluster = customer_match['cluster_id'].values[0]
126
- # st.write(f"Customer {customer_code} belongs to cluster {cluster}")
127
-
128
- # # Load the Corresponding Model
129
- # model_path = f'models/modelo_cluster_{cluster}.txt'
130
- # gbm = lgb.Booster(model_file=model_path)
131
- # st.write(f"Loaded model for cluster {cluster}")
132
-
133
- # # Load X_predict for that cluster
134
- # X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
135
-
136
- # # Filter for the specific customer
137
- # X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
138
-
139
- # if not X_cliente.empty:
140
- # # Prepare data for prediction
141
- # features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
142
-
143
- # # Make Prediction for the selected customer
144
- # y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
145
-
146
- # # Reassemble the results
147
- # results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
148
- # results['ventas_predichas'] = y_pred
149
-
150
- # st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
151
-
152
- # # Load actual data
153
- # df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
154
- # actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
155
-
156
- # if not actual_sales.empty:
157
- # results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
158
- # on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
159
- # how='left')
160
- # results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
161
-
162
- # # Calculate metrics only for non-null actual sales
163
- # valid_results = results.dropna(subset=['ventas_reales'])
164
- # if not valid_results.empty:
165
- # mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
166
- # mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
167
- # rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
168
-
169
- # st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
170
- # st.write(f"MAE: {mae:.2f}")
171
- # st.write(f"MAPE: {mape:.2f}%")
172
- # st.write(f"RMSE: {rmse:.2f}")
173
-
174
- # # Analysis of results
175
- # threshold_good = 100 # You may want to adjust this threshold
176
- # if mae < threshold_good:
177
- # st.success(f"Customer {customer_code} is performing well based on the predictions.")
178
- # else:
179
- # st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
180
- # else:
181
- # st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
182
-
183
- # # Show the radar chart
184
- # all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
185
- # all_manufacturers.index = all_manufacturers.index.astype(str)
186
-
187
- # sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
188
- # sales_data.index = sales_data.index.astype(str)
189
-
190
- # sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
191
- # sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
192
-
193
- # top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
194
- # top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
195
- # combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
196
- # combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
197
-
198
- # combined_data = pd.DataFrame({
199
- # 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
200
- # 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
201
- # }).fillna(0)
202
-
203
- # combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
204
- # non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
205
-
206
- # if len(non_zero_manufacturers) < 3:
207
- # zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
208
- # manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
209
- # else:
210
- # manufacturers_to_show = non_zero_manufacturers
211
-
212
- # values = manufacturers_to_show['units'].tolist()
213
- # amounts = manufacturers_to_show['sales'].tolist()
214
- # manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
215
-
216
- # st.write(f"### Results for top {len(manufacturers)} manufacturers:")
217
- # for manufacturer, value, amount in zip(manufacturers, values, amounts):
218
- # st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
219
-
220
- # if manufacturers:
221
- # fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
222
- # st.pyplot(fig)
223
- # else:
224
- # st.warning("No data available to create the radar chart.")
225
-
226
- # # Show sales over the years graph
227
- # sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
228
- # if all(col in ventas_clientes.columns for col in sales_columns):
229
- # years = ['2021', '2022', '2023']
230
- # customer_sales = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code][sales_columns].values[0]
231
-
232
- # fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
233
- # fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
234
- # st.plotly_chart(fig_sales)
235
- # else:
236
- # st.warning("Sales data for 2021-2023 not available.")
237
- # else:
238
- # st.warning(f"No prediction data found for customer {customer_code}.")
239
- # else:
240
- # st.warning(f"No data found for customer {customer_code}. Please check the code.")
241
- # else:
242
- # st.warning("Please select a customer.")
243
-
244
  elif page == "Customer Analysis":
245
  st.title("Customer Analysis")
246
  st.markdown("Use the tools below to explore your customer data.")
@@ -333,8 +198,7 @@ elif page == "Customer Analysis":
333
  st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
334
 
335
  # Load actual data
336
- df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
337
- actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
338
  st.write("### Actual Sales DataFrame:")
339
  st.write(actual_sales.head())
340
  st.write(f"Shape: {actual_sales.shape}")
@@ -574,3 +438,142 @@ elif page == "Articles Recommendations":
574
  st.warning("No recommendations found for the provided basket.")
575
  else:
576
  st.warning("Please select at least one article and set its quantity.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import lightgbm as lgb
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
9
+ from sklearn.metrics import mean_absolute_error, mean_squared_error
10
 
11
  # Page configuration
12
  st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
13
 
14
+ # Load CSV files at the top
15
  df = pd.read_csv("df_clean.csv")
16
  nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
17
  euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
18
  ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
19
  customer_clusters = pd.read_csv('predicts/customer_clusters.csv') # Load the customer clusters here
20
+ df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
21
 
22
  # Ensure customer codes are strings
23
  df['CLIENTE'] = df['CLIENTE'].astype(str)
 
27
  fieles_df = pd.read_csv("clientes_relevantes.csv")
28
  cestas = pd.read_csv("cestas.csv")
29
  productos = pd.read_csv("productos.csv")
30
+ df_agg_2024['cliente_id'] = df_agg_2024['cliente_id'].astype(str)
31
 
32
  # Convert all columns except 'CLIENTE' to float in euros_proveedor
33
  for col in euros_proveedor.columns:
 
106
  st.markdown("## Welcome to the Customer Insights App")
107
  st.write("Use the dropdown menu to navigate between the different sections.")
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  elif page == "Customer Analysis":
110
  st.title("Customer Analysis")
111
  st.markdown("Use the tools below to explore your customer data.")
 
198
  st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
199
 
200
  # Load actual data
201
+ actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
 
202
  st.write("### Actual Sales DataFrame:")
203
  st.write(actual_sales.head())
204
  st.write(f"Shape: {actual_sales.shape}")
 
438
  st.warning("No recommendations found for the provided basket.")
439
  else:
440
  st.warning("Please select at least one article and set its quantity.")
441
+
442
+
443
+ # Customer Analysis Page
444
+ # elif page == "Customer Analysis":
445
+ # st.title("Customer Analysis")
446
+ # st.markdown("Use the tools below to explore your customer data.")
447
+
448
+ # partial_code = st.text_input("Enter part of Customer Code (or leave empty to see all)")
449
+ # if partial_code:
450
+ # filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
451
+ # else:
452
+ # filtered_customers = df
453
+ # customer_list = filtered_customers['CLIENTE'].unique()
454
+ # customer_code = st.selectbox("Select Customer Code", customer_list)
455
+
456
+ # if st.button("Calcular"):
457
+ # if customer_code:
458
+ # # Find Customer's Cluster
459
+ # customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
460
+
461
+ # if not customer_match.empty:
462
+ # cluster = customer_match['cluster_id'].values[0]
463
+ # st.write(f"Customer {customer_code} belongs to cluster {cluster}")
464
+
465
+ # # Load the Corresponding Model
466
+ # model_path = f'models/modelo_cluster_{cluster}.txt'
467
+ # gbm = lgb.Booster(model_file=model_path)
468
+ # st.write(f"Loaded model for cluster {cluster}")
469
+
470
+ # # Load X_predict for that cluster
471
+ # X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
472
+
473
+ # # Filter for the specific customer
474
+ # X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
475
+
476
+ # if not X_cliente.empty:
477
+ # # Prepare data for prediction
478
+ # features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
479
+
480
+ # # Make Prediction for the selected customer
481
+ # y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
482
+
483
+ # # Reassemble the results
484
+ # results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
485
+ # results['ventas_predichas'] = y_pred
486
+
487
+ # st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
488
+
489
+ # # Load actual data
490
+ # df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
491
+ # actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
492
+
493
+ # if not actual_sales.empty:
494
+ # results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
495
+ # on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
496
+ # how='left')
497
+ # results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
498
+
499
+ # # Calculate metrics only for non-null actual sales
500
+ # valid_results = results.dropna(subset=['ventas_reales'])
501
+ # if not valid_results.empty:
502
+ # mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
503
+ # mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
504
+ # rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
505
+
506
+ # st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
507
+ # st.write(f"MAE: {mae:.2f}")
508
+ # st.write(f"MAPE: {mape:.2f}%")
509
+ # st.write(f"RMSE: {rmse:.2f}")
510
+
511
+ # # Analysis of results
512
+ # threshold_good = 100 # You may want to adjust this threshold
513
+ # if mae < threshold_good:
514
+ # st.success(f"Customer {customer_code} is performing well based on the predictions.")
515
+ # else:
516
+ # st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
517
+ # else:
518
+ # st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
519
+
520
+ # # Show the radar chart
521
+ # all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
522
+ # all_manufacturers.index = all_manufacturers.index.astype(str)
523
+
524
+ # sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
525
+ # sales_data.index = sales_data.index.astype(str)
526
+
527
+ # sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
528
+ # sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
529
+
530
+ # top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
531
+ # top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
532
+ # combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
533
+ # combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
534
+
535
+ # combined_data = pd.DataFrame({
536
+ # 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
537
+ # 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
538
+ # }).fillna(0)
539
+
540
+ # combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
541
+ # non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
542
+
543
+ # if len(non_zero_manufacturers) < 3:
544
+ # zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
545
+ # manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
546
+ # else:
547
+ # manufacturers_to_show = non_zero_manufacturers
548
+
549
+ # values = manufacturers_to_show['units'].tolist()
550
+ # amounts = manufacturers_to_show['sales'].tolist()
551
+ # manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
552
+
553
+ # st.write(f"### Results for top {len(manufacturers)} manufacturers:")
554
+ # for manufacturer, value, amount in zip(manufacturers, values, amounts):
555
+ # st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
556
+
557
+ # if manufacturers:
558
+ # fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
559
+ # st.pyplot(fig)
560
+ # else:
561
+ # st.warning("No data available to create the radar chart.")
562
+
563
+ # # Show sales over the years graph
564
+ # sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
565
+ # if all(col in ventas_clientes.columns for col in sales_columns):
566
+ # years = ['2021', '2022', '2023']
567
+ # customer_sales = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code][sales_columns].values[0]
568
+
569
+ # fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
570
+ # fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
571
+ # st.plotly_chart(fig_sales)
572
+ # else:
573
+ # st.warning("Sales data for 2021-2023 not available.")
574
+ # else:
575
+ # st.warning(f"No prediction data found for customer {customer_code}.")
576
+ # else:
577
+ # st.warning(f"No data found for customer {customer_code}. Please check the code.")
578
+ # else:
579
+ # st.warning("Please select a customer.")