GMARTINEZMILLA commited on
Commit
0cf8b26
1 Parent(s): 13f3bbf

feat: updated app.py

Browse files
Files changed (1) hide show
  1. app.py +574 -334
app.py CHANGED
@@ -11,84 +11,59 @@ from sklearn.metrics import mean_absolute_error, mean_squared_error
11
  # Page configuration
12
  st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
13
 
14
- # Load CSV files at the top
15
  df = pd.read_csv("df_clean.csv")
16
  nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
17
  euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
18
  ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
19
- customer_clusters = pd.read_csv('predicts/customer_clusters.csv') # Load the customer clusters here
20
- df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
21
 
22
  # Ensure customer codes are strings
23
  df['CLIENTE'] = df['CLIENTE'].astype(str)
24
  nombres_proveedores['codigo'] = nombres_proveedores['codigo'].astype(str)
25
  euros_proveedor['CLIENTE'] = euros_proveedor['CLIENTE'].astype(str)
26
- customer_clusters['cliente_id'] = customer_clusters['cliente_id'].astype(str) # Ensure customer IDs are strings
27
  fieles_df = pd.read_csv("clientes_relevantes.csv")
28
  cestas = pd.read_csv("cestas.csv")
29
  productos = pd.read_csv("productos.csv")
30
  df_agg_2024['cliente_id'] = df_agg_2024['cliente_id'].astype(str)
31
 
32
- # Convert all columns except 'CLIENTE' to float in euros_proveedor
33
  for col in euros_proveedor.columns:
34
  if col != 'CLIENTE':
35
  euros_proveedor[col] = pd.to_numeric(euros_proveedor[col], errors='coerce')
36
 
37
- # Check for NaN values after conversion
38
  if euros_proveedor.isna().any().any():
39
  st.warning("Some values in euros_proveedor couldn't be converted to numbers. Please review the input data.")
40
 
41
- # Ignore the last two columns of df
42
  df = df.iloc[:, :-2]
43
 
44
  # Function to get supplier name
45
  def get_supplier_name(code):
46
- code = str(code) # Ensure code is a string
47
  name = nombres_proveedores[nombres_proveedores['codigo'] == code]['nombre'].values
48
  return name[0] if len(name) > 0 else code
49
 
50
- # Function to create radar chart with square root transformation
51
  def radar_chart(categories, values, amounts, title):
52
- N = len(categories)
53
- angles = [n / float(N) * 2 * np.pi for n in range(N)]
54
- angles += angles[:1]
55
-
56
- fig, ax = plt.subplots(figsize=(12, 12), subplot_kw=dict(projection='polar'))
57
-
58
- # Apply square root transformation
59
- sqrt_values = np.sqrt(values)
60
- sqrt_amounts = np.sqrt(amounts)
61
-
62
- max_sqrt_value = max(sqrt_values)
63
- normalized_values = [v / max_sqrt_value for v in sqrt_values]
64
-
65
- # Adjust scaling for spend values
66
- max_sqrt_amount = max(sqrt_amounts)
67
- scaling_factor = 0.7 # Adjust this value to control how much the spend values are scaled up
68
- normalized_amounts = [min((a / max_sqrt_amount) * scaling_factor, 1.0) for a in sqrt_amounts]
69
-
70
- normalized_values += normalized_values[:1]
71
- ax.plot(angles, normalized_values, 'o-', linewidth=2, color='#FF69B4', label='% Units (sqrt)')
72
- ax.fill(angles, normalized_values, alpha=0.25, color='#FF69B4')
73
-
74
- normalized_amounts += normalized_amounts[:1]
75
- ax.plot(angles, normalized_amounts, 'o-', linewidth=2, color='#4B0082', label='% Spend (sqrt)')
76
- ax.fill(angles, normalized_amounts, alpha=0.25, color='#4B0082')
77
-
78
- ax.set_xticks(angles[:-1])
79
- ax.set_xticklabels(categories, size=8, wrap=True)
80
- ax.set_ylim(0, 1)
81
-
82
- circles = np.linspace(0, 1, 5)
83
- for circle in circles:
84
- ax.plot(angles, [circle]*len(angles), '--', color='gray', alpha=0.3, linewidth=0.5)
85
-
86
- ax.set_yticklabels([])
87
- ax.spines['polar'].set_visible(False)
88
-
89
- plt.title(title, size=16, y=1.1)
90
- plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
91
-
92
  return fig
93
 
94
  # Main page design
@@ -106,6 +81,7 @@ if page == "":
106
  st.markdown("## Welcome to the Customer Insights App")
107
  st.write("Use the dropdown menu to navigate between the different sections.")
108
 
 
109
  elif page == "Customer Analysis":
110
  st.title("Customer Analysis")
111
  st.markdown("Use the tools below to explore your customer data.")
@@ -120,368 +96,632 @@ elif page == "Customer Analysis":
120
 
121
  if st.button("Calcular"):
122
  if customer_code:
123
- # Find Customer's Cluster
124
  customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
125
 
126
  if not customer_match.empty:
127
  cluster = customer_match['cluster_id'].values[0]
128
  st.write(f"Customer {customer_code} belongs to cluster {cluster}")
129
 
130
- # Load the Corresponding Model
131
  model_path = f'models/modelo_cluster_{cluster}.txt'
132
  gbm = lgb.Booster(model_file=model_path)
133
  st.write(f"Loaded model for cluster {cluster}")
134
 
135
- # Inspect the model
136
- st.write("### Model Information:")
137
- st.write(f"Number of trees: {gbm.num_trees()}")
138
- st.write(f"Number of features: {gbm.num_feature()}")
139
- st.write("Feature names:")
140
- st.write(gbm.feature_name())
141
-
142
  # Load predict data for that cluster
143
  predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')
144
-
145
- # Convert cliente_id to string
146
  predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)
147
-
148
- st.write("### Predict Data DataFrame:")
149
- st.write(predict_data.head())
150
- st.write(f"Shape: {predict_data.shape}")
151
 
152
  # Filter for the specific customer
153
- customer_code_str = str(customer_code)
154
- customer_data = predict_data[predict_data['cliente_id'] == customer_code_str]
155
-
156
- # Add debug statements
157
- st.write(f"Unique customer IDs in predict data: {predict_data['cliente_id'].unique()}")
158
- st.write(f"Customer code we're looking for: {customer_code_str}")
159
-
160
- st.write("### Customer Data:")
161
- st.write(customer_data.head())
162
- st.write(f"Shape: {customer_data.shape}")
163
 
164
  if not customer_data.empty:
165
- # Define features consistently with the training process
166
  lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
167
  features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']
168
-
169
- # Prepare data for prediction
170
  X_predict = customer_data[features]
171
 
172
  # Convert categorical features to 'category' dtype
173
  categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
174
  for feature in categorical_features:
175
  X_predict[feature] = X_predict[feature].astype('category')
176
-
177
- st.write("### Features for Prediction:")
178
- st.write(X_predict.head())
179
- st.write(f"Shape: {X_predict.shape}")
180
- st.write("Data types:")
181
- st.write(X_predict.dtypes)
182
-
183
  # Make Prediction for the selected customer
184
  y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)
185
- st.write("### Prediction Results:")
186
- st.write(f"Type of y_pred: {type(y_pred)}")
187
- st.write(f"Shape of y_pred: {y_pred.shape}")
188
- st.write("First few predictions:")
189
- st.write(y_pred[:5])
190
-
191
- # Reassemble the results
192
  results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
193
  results['ventas_predichas'] = y_pred
194
- st.write("### Results DataFrame:")
195
- st.write(results.head())
196
- st.write(f"Shape: {results.shape}")
197
-
198
- st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
199
 
200
- # Load actual data
201
- actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
202
- st.write("### Actual Sales DataFrame:")
203
- st.write(actual_sales.head())
204
- st.write(f"Shape: {actual_sales.shape}")
205
-
206
  if not actual_sales.empty:
207
- results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
208
- on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
209
- how='left')
 
 
210
  results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
211
  results['ventas_reales'].fillna(0, inplace=True)
212
- st.write("### Final Results DataFrame:")
213
- st.write(results.head())
214
- st.write(f"Shape: {results.shape}")
215
-
216
- # Calculate metrics only for non-null actual sales
217
  valid_results = results.dropna(subset=['ventas_reales'])
218
  if not valid_results.empty:
219
  mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
220
  mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
221
  rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
222
 
223
- st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
224
  st.write(f"MAE: {mae:.2f}")
225
  st.write(f"MAPE: {mape:.2f}%")
226
  st.write(f"RMSE: {rmse:.2f}")
227
 
228
- # Analysis of results
229
- threshold_good = 100 # You may want to adjust this threshold
230
- if mae < threshold_good:
231
- st.success(f"Customer {customer_code} is performing well based on the predictions.")
232
- else:
233
- st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
234
- else:
235
- st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
236
 
237
- st.write("### Debug Information for Radar Chart:")
238
- st.write(f"Shape of customer_data: {customer_data.shape}")
239
- st.write(f"Shape of euros_proveedor: {euros_proveedor.shape}")
 
240
 
241
- # Get percentage of units sold for each manufacturer
242
- customer_df = df[df["CLIENTE"] == str(customer_code)] # Get the customer data
243
- all_manufacturers = customer_df.iloc[:, 1:].T # Exclude CLIENTE column (manufacturers are in columns)
244
- all_manufacturers.index = all_manufacturers.index.astype(str)
245
 
246
- # Get total sales for each manufacturer from euros_proveedor
247
- customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
248
- sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
249
- sales_data.index = sales_data.index.astype(str)
 
 
250
 
251
- # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
252
- sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
 
 
 
 
 
 
 
 
 
253
 
254
- # Ensure all values are numeric
255
- sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
256
- all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
257
 
258
- # Sort manufacturers by percentage of units and get top 10
259
- top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
260
 
261
- # Sort manufacturers by total sales and get top 10
262
- top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
263
 
264
- # Combine top manufacturers from both lists and get up to 20 unique manufacturers
265
- combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
266
 
267
- # Filter out manufacturers that are not present in both datasets
268
- combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
269
 
270
- st.write(f"Number of combined top manufacturers: {len(combined_top)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
- if combined_top:
273
- # Create a DataFrame with combined data for these top manufacturers
274
- combined_data = pd.DataFrame({
275
- 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
276
- 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
277
- }).fillna(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
- # Sort by units, then by sales
280
- combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
281
 
282
- # Filter out manufacturers with 0 units
283
- non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
284
 
285
- # If we have less than 3 non-zero manufacturers, add some zero-value ones
286
- if len(non_zero_manufacturers) < 3:
287
- zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
288
- manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
289
- else:
290
- manufacturers_to_show = non_zero_manufacturers
291
 
292
- values = manufacturers_to_show['units'].tolist()
293
- amounts = manufacturers_to_show['sales'].tolist()
294
- manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
295
 
296
- st.write(f"### Results for top {len(manufacturers)} manufacturers:")
297
- for manufacturer, value, amount in zip(manufacturers, values, amounts):
298
- st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
 
 
 
299
 
300
- if manufacturers: # Only create the chart if we have data
301
- fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
302
- st.pyplot(fig)
303
- else:
304
- st.warning("No data available to create the radar chart.")
305
 
306
- else:
307
- st.warning("No combined top manufacturers found.")
308
 
309
- # Ensure codigo_cliente in ventas_clientes is a string
310
- ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()
 
 
 
 
311
 
312
- # Ensure customer_code is a string and strip any spaces
313
- customer_code = str(customer_code).strip()
 
314
 
315
- if customer_code in ventas_clientes['codigo_cliente'].unique():
316
- st.write(f"Customer {customer_code} found in ventas_clientes")
317
- else:
318
- st.write(f"Customer {customer_code} not found in ventas_clientes")
319
 
320
- # Customer sales 2021-2024 (if data exists)
321
- sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
322
- if all(col in ventas_clientes.columns for col in sales_columns):
323
- customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
- if not customer_sales_data.empty:
326
- customer_sales = customer_sales_data[sales_columns].values[0]
327
- years = ['2021', '2022', '2023']
328
 
329
- fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
330
- fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
331
- st.plotly_chart(fig_sales)
332
- else:
333
- st.warning(f"No historical sales data found for customer {customer_code}")
334
- else:
335
- st.warning("Sales data for 2021-2023 not available in the dataset.")
336
- else:
337
- st.warning(f"No data found for customer {customer_code}. Please check the code.")
338
- else:
339
- st.warning("Please select a customer.")
340
 
341
 
342
- # Customer Recommendations Page
343
- elif page == "Articles Recommendations":
344
- st.title("Articles Recommendations")
345
 
346
- st.markdown("""
347
- Get tailored recommendations for your customers based on their basket.
348
- """)
349
 
350
- # Campo input para cliente
351
- partial_code = st.text_input("Enter part of Customer Code for Recommendations (or leave empty to see all)")
352
- if partial_code:
353
- filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
354
- else:
355
- filtered_customers = df
356
- customer_list = filtered_customers['CLIENTE'].unique()
357
- customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
- # Definición de la función recomienda
360
- def recomienda(new_basket):
361
- # Calcular la matriz TF-IDF
362
- tfidf = TfidfVectorizer()
363
- tfidf_matrix = tfidf.fit_transform(cestas['Cestas'])
364
-
365
- # Convertir la nueva cesta en formato TF-IDF
366
- new_basket_str = ' '.join(new_basket)
367
- new_basket_tfidf = tfidf.transform([new_basket_str])
368
-
369
- # Comparar la nueva cesta con las anteriores
370
- similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
371
-
372
- # Obtener los índices de las cestas más similares
373
- similar_indices = similarities.argsort()[0][-3:] # Las 3 más similares
374
-
375
- # Crear un diccionario para contar las recomendaciones
376
- recommendations_count = {}
377
- total_similarity = 0
378
-
379
- # Recomendar productos de cestas similares
380
- for idx in similar_indices:
381
- sim_score = similarities[0][idx]
382
- total_similarity += sim_score
383
- products = cestas.iloc[idx]['Cestas'].split()
384
-
385
- for product in products:
386
- if product.strip() not in new_basket: # Evitar recomendar lo que ya está en la cesta
387
- if product.strip() in recommendations_count:
388
- recommendations_count[product.strip()] += sim_score
389
- else:
390
- recommendations_count[product.strip()] = sim_score
391
-
392
- # Calcular la probabilidad relativa de cada producto recomendado
393
- recommendations_with_prob = []
394
- if total_similarity > 0: # Verificar que total_similarity no sea cero
395
- recommendations_with_prob = [(product, score / total_similarity) for product, score in recommendations_count.items()]
396
- else:
397
- print("No se encontraron similitudes suficientes para calcular probabilidades.")
398
-
399
- recommendations_with_prob.sort(key=lambda x: x[1], reverse=True) # Ordenar por puntuación
400
-
401
- # Crear un nuevo DataFrame para almacenar las recomendaciones con descripciones y probabilidades
402
- recommendations_df = pd.DataFrame(columns=['ARTICULO', 'DESCRIPCION', 'PROBABILIDAD'])
403
-
404
- # Agregar las recomendaciones al DataFrame usando pd.concat
405
- for product, prob in recommendations_with_prob:
406
- # Buscar la descripción en el DataFrame de productos
407
- description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
408
- if not description.empty:
409
- # Crear un nuevo DataFrame temporal para la recomendación
410
- temp_df = pd.DataFrame({
411
- 'ARTICULO': [product],
412
- 'DESCRIPCION': [description.values[0]], # Obtener el primer valor encontrado
413
- 'PROBABILIDAD': [prob]
414
- })
415
- # Concatenar el DataFrame temporal al DataFrame de recomendaciones
416
- recommendations_df = pd.concat([recommendations_df, temp_df], ignore_index=True)
417
-
418
- return recommendations_df
419
-
420
- # Comprobar si el cliente está en el CSV de fieles
421
- is_fiel = customer_code in fieles_df['Cliente'].astype(str).values
 
 
 
 
 
422
 
423
- if customer_code:
424
- if is_fiel:
425
- st.write(f"### Customer {customer_code} is a loyal customer.")
426
- option = st.selectbox("Select Recommendation Type", ["Select an option", "By Purchase History", "By Current Basket"])
427
-
428
- if option == "By Purchase History":
429
- st.warning("Option not available... aún")
430
- elif option == "By Current Basket":
431
- st.write("Select the items and assign quantities for the basket:")
432
-
433
- # Mostrar lista de artículos disponibles
434
- available_articles = productos['ARTICULO'].unique()
435
- selected_articles = st.multiselect("Select Articles", available_articles)
436
-
437
- # Crear inputs para ingresar las cantidades de cada artículo seleccionado
438
- quantities = {}
439
- for article in selected_articles:
440
- quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
441
-
442
- if st.button("Calcular"): # Añadimos el botón "Calcular"
443
- # Crear una lista de artículos basada en la selección
444
- new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
445
-
446
- if new_basket:
447
- # Procesar la lista para recomendar
448
- recommendations_df = recomienda(new_basket)
449
-
450
- if not recommendations_df.empty:
451
- st.write("### Recommendations based on the current basket:")
452
- st.dataframe(recommendations_df)
453
- else:
454
- st.warning("No recommendations found for the provided basket.")
455
- else:
456
- st.warning("Please select at least one article and set its quantity.")
457
- else:
458
- st.write(f"### Customer {customer_code} is not a loyal customer.")
459
- st.write("Select items and assign quantities for the basket:")
460
-
461
- # Mostrar lista de artículos disponibles
462
- available_articles = productos['ARTICULO'].unique()
463
- selected_articles = st.multiselect("Select Articles", available_articles)
464
 
465
- # Crear inputs para ingresar las cantidades de cada artículo seleccionado
466
- quantities = {}
467
- for article in selected_articles:
468
- quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
469
 
470
- if st.button("Calcular"): # Añadimos el botón "Calcular"
471
- # Crear una lista de artículos basada en la selección
472
- new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
473
 
474
- if new_basket:
475
- # Procesar la lista para recomendar
476
- recommendations_df = recomienda(new_basket)
477
 
478
- if not recommendations_df.empty:
479
- st.write("### Recommendations based on the current basket:")
480
- st.dataframe(recommendations_df)
481
- else:
482
- st.warning("No recommendations found for the provided basket.")
483
- else:
484
- st.warning("Please select at least one article and set its quantity.")
485
 
486
 
487
  # Customer Analysis Page
 
11
  # Page configuration
12
  st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
13
 
14
+ # Load CSV files
15
  df = pd.read_csv("df_clean.csv")
16
  nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
17
  euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
18
  ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
19
+ customer_clusters = pd.read_csv('predicts/customer_clusters.csv')
20
+ df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
21
 
22
  # Ensure customer codes are strings
23
  df['CLIENTE'] = df['CLIENTE'].astype(str)
24
  nombres_proveedores['codigo'] = nombres_proveedores['codigo'].astype(str)
25
  euros_proveedor['CLIENTE'] = euros_proveedor['CLIENTE'].astype(str)
26
+ customer_clusters['cliente_id'] = customer_clusters['cliente_id'].astype(str)
27
  fieles_df = pd.read_csv("clientes_relevantes.csv")
28
  cestas = pd.read_csv("cestas.csv")
29
  productos = pd.read_csv("productos.csv")
30
  df_agg_2024['cliente_id'] = df_agg_2024['cliente_id'].astype(str)
31
 
32
+ # Convert columns in euros_proveedor to numeric
33
  for col in euros_proveedor.columns:
34
  if col != 'CLIENTE':
35
  euros_proveedor[col] = pd.to_numeric(euros_proveedor[col], errors='coerce')
36
 
37
+ # Check for NaN values in euros_proveedor
38
  if euros_proveedor.isna().any().any():
39
  st.warning("Some values in euros_proveedor couldn't be converted to numbers. Please review the input data.")
40
 
41
+ # Ignore the last two columns in df
42
  df = df.iloc[:, :-2]
43
 
44
  # Function to get supplier name
45
  def get_supplier_name(code):
46
+ code = str(code)
47
  name = nombres_proveedores[nombres_proveedores['codigo'] == code]['nombre'].values
48
  return name[0] if len(name) > 0 else code
49
 
50
+ # Function to create radar chart using Plotly
51
  def radar_chart(categories, values, amounts, title):
52
+ fig = px.line_polar(
53
+ r=values,
54
+ theta=categories,
55
+ line_close=True,
56
+ labels={'r': '% Units Sold', 'theta': 'Manufacturers'},
57
+ title=title,
58
+ )
59
+ fig.add_scatterpolar(
60
+ r=amounts,
61
+ theta=categories,
62
+ line_close=True,
63
+ name="Spend (€)",
64
+ mode="lines+markers"
65
+ )
66
+ fig.update_traces(fill='toself')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  return fig
68
 
69
  # Main page design
 
81
  st.markdown("## Welcome to the Customer Insights App")
82
  st.write("Use the dropdown menu to navigate between the different sections.")
83
 
84
+ # Customer Analysis Page
85
  elif page == "Customer Analysis":
86
  st.title("Customer Analysis")
87
  st.markdown("Use the tools below to explore your customer data.")
 
96
 
97
  if st.button("Calcular"):
98
  if customer_code:
 
99
  customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
100
 
101
  if not customer_match.empty:
102
  cluster = customer_match['cluster_id'].values[0]
103
  st.write(f"Customer {customer_code} belongs to cluster {cluster}")
104
 
105
+ # Load the corresponding model
106
  model_path = f'models/modelo_cluster_{cluster}.txt'
107
  gbm = lgb.Booster(model_file=model_path)
108
  st.write(f"Loaded model for cluster {cluster}")
109
 
 
 
 
 
 
 
 
110
  # Load predict data for that cluster
111
  predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')
 
 
112
  predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)
 
 
 
 
113
 
114
  # Filter for the specific customer
115
+ customer_data = predict_data[predict_data['cliente_id'] == customer_code]
 
 
 
 
 
 
 
 
 
116
 
117
  if not customer_data.empty:
 
118
  lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
119
  features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']
 
 
120
  X_predict = customer_data[features]
121
 
122
  # Convert categorical features to 'category' dtype
123
  categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
124
  for feature in categorical_features:
125
  X_predict[feature] = X_predict[feature].astype('category')
126
+
 
 
 
 
 
 
127
  # Make Prediction for the selected customer
128
  y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)
129
+
130
+ # Results DataFrame
 
 
 
 
 
131
  results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
132
  results['ventas_predichas'] = y_pred
 
 
 
 
 
133
 
134
+ # Load actual sales data
135
+ actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
 
 
 
 
136
  if not actual_sales.empty:
137
+ results = results.merge(
138
+ actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
139
+ on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
140
+ how='left'
141
+ )
142
  results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
143
  results['ventas_reales'].fillna(0, inplace=True)
144
+
145
+ # Calculate error metrics
 
 
 
146
  valid_results = results.dropna(subset=['ventas_reales'])
147
  if not valid_results.empty:
148
  mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
149
  mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
150
  rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
151
 
 
152
  st.write(f"MAE: {mae:.2f}")
153
  st.write(f"MAPE: {mape:.2f}%")
154
  st.write(f"RMSE: {rmse:.2f}")
155
 
156
+ # Plot radar chart
157
+ top_units = df[df["CLIENTE"] == str(customer_code)].iloc[:, 1:].T
158
+ top_sales = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)].iloc[:, 1:].T
159
+
160
+ combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
 
 
 
161
 
162
+ combined_data = pd.DataFrame({
163
+ 'units': top_units.loc[combined_top, top_units.columns[0]],
164
+ 'sales': top_sales.loc[combined_top, top_sales.columns[0]]
165
+ }).fillna(0)
166
 
167
+ manufacturers = [get_supplier_name(m) for m in combined_data.index]
168
+ values = combined_data['units'].tolist()
169
+ amounts = combined_data['sales'].tolist()
 
170
 
171
+ fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Customer {customer_code}')
172
+ st.plotly_chart(fig)
173
+
174
+ # Articles Recommendations Page
175
+ elif page == "Articles Recommendations":
176
+ st.title("Articles Recommendations")
177
 
178
+ st.markdown("""
179
+ Get tailored recommendations for your customers based on their basket.
180
+ """)
181
+
182
+ partial_code = st.text_input("Enter part of Customer Code for Recommendations (or leave empty to see all)")
183
+ if partial_code:
184
+ filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
185
+ else:
186
+ filtered_customers = df
187
+ customer_list = filtered_customers['CLIENTE'].unique()
188
+ customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))
189
 
190
+ if customer_code:
191
+ option = st.selectbox("Select Recommendation Type", ["Select an option", "By Purchase History", "By Current Basket"])
 
192
 
193
+ if option == "By Current Basket":
194
+ st.write("Select the items and assign quantities for the basket:")
195
 
196
+ available_articles = productos['ARTICULO'].unique()
197
+ selected_articles = st.multiselect("Select Articles", available_articles)
198
 
199
+ quantities = {article: st.number_input(f"Quantity for {article}", min_value=0, step=1) for article in selected_articles}
 
200
 
201
+ if st.button("Calcular"):
202
+ new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
203
 
204
+ if new_basket:
205
+ def recomienda(new_basket):
206
+ tfidf = TfidfVectorizer()
207
+ tfidf_matrix = tfidf.fit_transform(cestas['Cestas'])
208
+ new_basket_tfidf = tfidf.transform([' '.join(new_basket)])
209
+ similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
210
+ similar_indices = similarities.argsort()[0][-3:]
211
+
212
+ recommendations_count = {}
213
+ total_similarity = 0
214
+
215
+ for idx in similar_indices:
216
+ sim_score = similarities[0][idx]
217
+ total_similarity += sim_score
218
+ products = cestas.iloc[idx]['Cestas'].split()
219
+
220
+ for product in products:
221
+ if product not in new_basket:
222
+ recommendations_count[product] = recommendations_count.get(product, 0) + sim_score
223
+
224
+ recommendations_with_prob = [(prod, score / total_similarity) for prod, score in recommendations_count.items()]
225
+ recommendations_with_prob.sort(key=lambda x: x[1], reverse=True)
226
+
227
+ recommendations_df = pd.DataFrame({
228
+ 'ARTICULO': [r[0] for r in recommendations_with_prob],
229
+ 'PROBABILIDAD': [r[1] for r in recommendations_with_prob]
230
+ })
231
+ return recommendations_df
232
 
233
+ recommendations_df = recomienda(new_basket)
234
+ st.dataframe(recommendations_df)
235
+ else:
236
+ st.warning("Please select at least one article and set its quantity.")
237
+
238
+
239
+
240
+ # import streamlit as st
241
+ # import pandas as pd
242
+ # import plotly.express as px
243
+ # import matplotlib.pyplot as plt
244
+ # import numpy as np
245
+ # import lightgbm as lgb
246
+ # from sklearn.feature_extraction.text import TfidfVectorizer
247
+ # from sklearn.metrics.pairwise import cosine_similarity
248
+ # from sklearn.metrics import mean_absolute_error, mean_squared_error
249
+
250
+ # # Page configuration
251
+ # st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
252
+
253
+ # # Load CSV files at the top
254
+ # df = pd.read_csv("df_clean.csv")
255
+ # nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
256
+ # euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
257
+ # ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
258
+ # customer_clusters = pd.read_csv('predicts/customer_clusters.csv') # Load the customer clusters here
259
+ # df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
260
+
261
+ # # Ensure customer codes are strings
262
+ # df['CLIENTE'] = df['CLIENTE'].astype(str)
263
+ # nombres_proveedores['codigo'] = nombres_proveedores['codigo'].astype(str)
264
+ # euros_proveedor['CLIENTE'] = euros_proveedor['CLIENTE'].astype(str)
265
+ # customer_clusters['cliente_id'] = customer_clusters['cliente_id'].astype(str) # Ensure customer IDs are strings
266
+ # fieles_df = pd.read_csv("clientes_relevantes.csv")
267
+ # cestas = pd.read_csv("cestas.csv")
268
+ # productos = pd.read_csv("productos.csv")
269
+ # df_agg_2024['cliente_id'] = df_agg_2024['cliente_id'].astype(str)
270
+
271
+ # # Convert all columns except 'CLIENTE' to float in euros_proveedor
272
+ # for col in euros_proveedor.columns:
273
+ # if col != 'CLIENTE':
274
+ # euros_proveedor[col] = pd.to_numeric(euros_proveedor[col], errors='coerce')
275
+
276
+ # # Check for NaN values after conversion
277
+ # if euros_proveedor.isna().any().any():
278
+ # st.warning("Some values in euros_proveedor couldn't be converted to numbers. Please review the input data.")
279
+
280
+ # # Ignore the last two columns of df
281
+ # df = df.iloc[:, :-2]
282
+
283
+ # # Function to get supplier name
284
+ # def get_supplier_name(code):
285
+ # code = str(code) # Ensure code is a string
286
+ # name = nombres_proveedores[nombres_proveedores['codigo'] == code]['nombre'].values
287
+ # return name[0] if len(name) > 0 else code
288
+
289
+ # # Function to create radar chart with square root transformation
290
+ # def radar_chart(categories, values, amounts, title):
291
+ # N = len(categories)
292
+ # angles = [n / float(N) * 2 * np.pi for n in range(N)]
293
+ # angles += angles[:1]
294
+
295
+ # fig, ax = plt.subplots(figsize=(12, 12), subplot_kw=dict(projection='polar'))
296
+
297
+ # # Apply square root transformation
298
+ # sqrt_values = np.sqrt(values)
299
+ # sqrt_amounts = np.sqrt(amounts)
300
+
301
+ # max_sqrt_value = max(sqrt_values)
302
+ # normalized_values = [v / max_sqrt_value for v in sqrt_values]
303
+
304
+ # # Adjust scaling for spend values
305
+ # max_sqrt_amount = max(sqrt_amounts)
306
+ # scaling_factor = 0.7 # Adjust this value to control how much the spend values are scaled up
307
+ # normalized_amounts = [min((a / max_sqrt_amount) * scaling_factor, 1.0) for a in sqrt_amounts]
308
+
309
+ # normalized_values += normalized_values[:1]
310
+ # ax.plot(angles, normalized_values, 'o-', linewidth=2, color='#FF69B4', label='% Units (sqrt)')
311
+ # ax.fill(angles, normalized_values, alpha=0.25, color='#FF69B4')
312
+
313
+ # normalized_amounts += normalized_amounts[:1]
314
+ # ax.plot(angles, normalized_amounts, 'o-', linewidth=2, color='#4B0082', label='% Spend (sqrt)')
315
+ # ax.fill(angles, normalized_amounts, alpha=0.25, color='#4B0082')
316
+
317
+ # ax.set_xticks(angles[:-1])
318
+ # ax.set_xticklabels(categories, size=8, wrap=True)
319
+ # ax.set_ylim(0, 1)
320
+
321
+ # circles = np.linspace(0, 1, 5)
322
+ # for circle in circles:
323
+ # ax.plot(angles, [circle]*len(angles), '--', color='gray', alpha=0.3, linewidth=0.5)
324
+
325
+ # ax.set_yticklabels([])
326
+ # ax.spines['polar'].set_visible(False)
327
+
328
+ # plt.title(title, size=16, y=1.1)
329
+ # plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
330
+
331
+ # return fig
332
+
333
+ # # Main page design
334
+ # st.title("Welcome to Customer Insights App")
335
+ # st.markdown("""
336
+ # This app helps businesses analyze customer behaviors and provide personalized recommendations based on purchase history.
337
+ # Use the tools below to dive deeper into your customer data.
338
+ # """)
339
+
340
+ # # Navigation menu
341
+ # page = st.selectbox("Select the tool you want to use", ["", "Customer Analysis", "Articles Recommendations"])
342
+
343
+ # # Home Page
344
+ # if page == "":
345
+ # st.markdown("## Welcome to the Customer Insights App")
346
+ # st.write("Use the dropdown menu to navigate between the different sections.")
347
+
348
+ # # Customer Analysis Page
349
+ # elif page == "Customer Analysis":
350
+ # st.title("Customer Analysis")
351
+ # st.markdown("Use the tools below to explore your customer data.")
352
+
353
+ # partial_code = st.text_input("Enter part of Customer Code (or leave empty to see all)")
354
+ # if partial_code:
355
+ # filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
356
+ # else:
357
+ # filtered_customers = df
358
+ # customer_list = filtered_customers['CLIENTE'].unique()
359
+ # customer_code = st.selectbox("Select Customer Code", customer_list)
360
+
361
+ # if st.button("Calcular"):
362
+ # if customer_code:
363
+ # # Find Customer's Cluster
364
+ # customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
365
+
366
+ # if not customer_match.empty:
367
+ # cluster = customer_match['cluster_id'].values[0]
368
+ # st.write(f"Customer {customer_code} belongs to cluster {cluster}")
369
+
370
+ # # Load the Corresponding Model
371
+ # model_path = f'models/modelo_cluster_{cluster}.txt'
372
+ # gbm = lgb.Booster(model_file=model_path)
373
+ # st.write(f"Loaded model for cluster {cluster}")
374
+
375
+ # # Inspect the model
376
+ # st.write("### Model Information:")
377
+ # st.write(f"Number of trees: {gbm.num_trees()}")
378
+ # st.write(f"Number of features: {gbm.num_feature()}")
379
+ # st.write("Feature names:")
380
+ # st.write(gbm.feature_name())
381
+
382
+ # # Load predict data for that cluster
383
+ # predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')
384
+
385
+ # # Convert cliente_id to string
386
+ # predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)
387
+
388
+ # st.write("### Predict Data DataFrame:")
389
+ # st.write(predict_data.head())
390
+ # st.write(f"Shape: {predict_data.shape}")
391
+
392
+ # # Filter for the specific customer
393
+ # customer_code_str = str(customer_code)
394
+ # customer_data = predict_data[predict_data['cliente_id'] == customer_code_str]
395
+
396
+ # # Add debug statements
397
+ # st.write(f"Unique customer IDs in predict data: {predict_data['cliente_id'].unique()}")
398
+ # st.write(f"Customer code we're looking for: {customer_code_str}")
399
+
400
+ # st.write("### Customer Data:")
401
+ # st.write(customer_data.head())
402
+ # st.write(f"Shape: {customer_data.shape}")
403
+
404
+ # if not customer_data.empty:
405
+ # # Define features consistently with the training process
406
+ # lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
407
+ # features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']
408
+
409
+ # # Prepare data for prediction
410
+ # X_predict = customer_data[features]
411
+
412
+ # # Convert categorical features to 'category' dtype
413
+ # categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
414
+ # for feature in categorical_features:
415
+ # X_predict[feature] = X_predict[feature].astype('category')
416
+
417
+ # st.write("### Features for Prediction:")
418
+ # st.write(X_predict.head())
419
+ # st.write(f"Shape: {X_predict.shape}")
420
+ # st.write("Data types:")
421
+ # st.write(X_predict.dtypes)
422
+
423
+ # # Make Prediction for the selected customer
424
+ # y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)
425
+ # st.write("### Prediction Results:")
426
+ # st.write(f"Type of y_pred: {type(y_pred)}")
427
+ # st.write(f"Shape of y_pred: {y_pred.shape}")
428
+ # st.write("First few predictions:")
429
+ # st.write(y_pred[:5])
430
+
431
+ # # Reassemble the results
432
+ # results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
433
+ # results['ventas_predichas'] = y_pred
434
+ # st.write("### Results DataFrame:")
435
+ # st.write(results.head())
436
+ # st.write(f"Shape: {results.shape}")
437
+
438
+ # st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
439
+
440
+ # # Load actual data
441
+ # actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
442
+ # st.write("### Actual Sales DataFrame:")
443
+ # st.write(actual_sales.head())
444
+ # st.write(f"Shape: {actual_sales.shape}")
445
+
446
+ # if not actual_sales.empty:
447
+ # results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
448
+ # on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
449
+ # how='left')
450
+ # results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
451
+ # results['ventas_reales'].fillna(0, inplace=True)
452
+ # st.write("### Final Results DataFrame:")
453
+ # st.write(results.head())
454
+ # st.write(f"Shape: {results.shape}")
455
+
456
+ # # Calculate metrics only for non-null actual sales
457
+ # valid_results = results.dropna(subset=['ventas_reales'])
458
+ # if not valid_results.empty:
459
+ # mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
460
+ # mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
461
+ # rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
462
+
463
+ # st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
464
+ # st.write(f"MAE: {mae:.2f}")
465
+ # st.write(f"MAPE: {mape:.2f}%")
466
+ # st.write(f"RMSE: {rmse:.2f}")
467
+
468
+ # # Analysis of results
469
+ # threshold_good = 100 # You may want to adjust this threshold
470
+ # if mae < threshold_good:
471
+ # st.success(f"Customer {customer_code} is performing well based on the predictions.")
472
+ # else:
473
+ # st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
474
+ # else:
475
+ # st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
476
+
477
+ # st.write("### Debug Information for Radar Chart:")
478
+ # st.write(f"Shape of customer_data: {customer_data.shape}")
479
+ # st.write(f"Shape of euros_proveedor: {euros_proveedor.shape}")
480
+
481
+ # # Get percentage of units sold for each manufacturer
482
+ # customer_df = df[df["CLIENTE"] == str(customer_code)] # Get the customer data
483
+ # all_manufacturers = customer_df.iloc[:, 1:].T # Exclude CLIENTE column (manufacturers are in columns)
484
+ # all_manufacturers.index = all_manufacturers.index.astype(str)
485
+
486
+ # # Get total sales for each manufacturer from euros_proveedor
487
+ # customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
488
+ # sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
489
+ # sales_data.index = sales_data.index.astype(str)
490
+
491
+ # # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
492
+ # sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
493
+
494
+ # # Ensure all values are numeric
495
+ # sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
496
+ # all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
497
+
498
+ # # Sort manufacturers by percentage of units and get top 10
499
+ # top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
500
 
501
+ # # Sort manufacturers by total sales and get top 10
502
+ # top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
503
 
504
+ # # Combine top manufacturers from both lists and get up to 20 unique manufacturers
505
+ # combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
506
 
507
+ # # Filter out manufacturers that are not present in both datasets
508
+ # combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
 
 
 
 
509
 
510
+ # st.write(f"Number of combined top manufacturers: {len(combined_top)}")
 
 
511
 
512
+ # if combined_top:
513
+ # # Create a DataFrame with combined data for these top manufacturers
514
+ # combined_data = pd.DataFrame({
515
+ # 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
516
+ # 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
517
+ # }).fillna(0)
518
 
519
+ # # Sort by units, then by sales
520
+ # combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
 
 
 
521
 
522
+ # # Filter out manufacturers with 0 units
523
+ # non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
524
 
525
+ # # If we have less than 3 non-zero manufacturers, add some zero-value ones
526
+ # if len(non_zero_manufacturers) < 3:
527
+ # zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
528
+ # manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
529
+ # else:
530
+ # manufacturers_to_show = non_zero_manufacturers
531
 
532
+ # values = manufacturers_to_show['units'].tolist()
533
+ # amounts = manufacturers_to_show['sales'].tolist()
534
+ # manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
535
 
536
+ # st.write(f"### Results for top {len(manufacturers)} manufacturers:")
537
+ # for manufacturer, value, amount in zip(manufacturers, values, amounts):
538
+ # st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
 
539
 
540
+ # if manufacturers: # Only create the chart if we have data
541
+ # fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
542
+ # st.pyplot(fig)
543
+ # else:
544
+ # st.warning("No data available to create the radar chart.")
545
+
546
+ # else:
547
+ # st.warning("No combined top manufacturers found.")
548
+
549
+ # # Ensure codigo_cliente in ventas_clientes is a string
550
+ # ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()
551
+
552
+ # # Ensure customer_code is a string and strip any spaces
553
+ # customer_code = str(customer_code).strip()
554
+
555
+ # if customer_code in ventas_clientes['codigo_cliente'].unique():
556
+ # st.write(f"Customer {customer_code} found in ventas_clientes")
557
+ # else:
558
+ # st.write(f"Customer {customer_code} not found in ventas_clientes")
559
+
560
+ # # Customer sales 2021-2024 (if data exists)
561
+ # sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
562
+ # if all(col in ventas_clientes.columns for col in sales_columns):
563
+ # customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
564
 
565
+ # if not customer_sales_data.empty:
566
+ # customer_sales = customer_sales_data[sales_columns].values[0]
567
+ # years = ['2021', '2022', '2023']
568
 
569
+ # fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
570
+ # fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
571
+ # st.plotly_chart(fig_sales)
572
+ # else:
573
+ # st.warning(f"No historical sales data found for customer {customer_code}")
574
+ # else:
575
+ # st.warning("Sales data for 2021-2023 not available in the dataset.")
576
+ # else:
577
+ # st.warning(f"No data found for customer {customer_code}. Please check the code.")
578
+ # else:
579
+ # st.warning("Please select a customer.")
580
 
581
 
582
+ # # Customer Recommendations Page
583
+ # elif page == "Articles Recommendations":
584
+ # st.title("Articles Recommendations")
585
 
586
+ # st.markdown("""
587
+ # Get tailored recommendations for your customers based on their basket.
588
+ # """)
589
 
590
+ # # Campo input para cliente
591
+ # partial_code = st.text_input("Enter part of Customer Code for Recommendations (or leave empty to see all)")
592
+ # if partial_code:
593
+ # filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
594
+ # else:
595
+ # filtered_customers = df
596
+ # customer_list = filtered_customers['CLIENTE'].unique()
597
+ # customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))
598
+
599
+ # # Definición de la función recomienda
600
+ # def recomienda(new_basket):
601
+ # # Calcular la matriz TF-IDF
602
+ # tfidf = TfidfVectorizer()
603
+ # tfidf_matrix = tfidf.fit_transform(cestas['Cestas'])
604
+
605
+ # # Convertir la nueva cesta en formato TF-IDF
606
+ # new_basket_str = ' '.join(new_basket)
607
+ # new_basket_tfidf = tfidf.transform([new_basket_str])
608
+
609
+ # # Comparar la nueva cesta con las anteriores
610
+ # similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
611
+
612
+ # # Obtener los índices de las cestas más similares
613
+ # similar_indices = similarities.argsort()[0][-3:] # Las 3 más similares
614
+
615
+ # # Crear un diccionario para contar las recomendaciones
616
+ # recommendations_count = {}
617
+ # total_similarity = 0
618
+
619
+ # # Recomendar productos de cestas similares
620
+ # for idx in similar_indices:
621
+ # sim_score = similarities[0][idx]
622
+ # total_similarity += sim_score
623
+ # products = cestas.iloc[idx]['Cestas'].split()
624
+
625
+ # for product in products:
626
+ # if product.strip() not in new_basket: # Evitar recomendar lo que ya está en la cesta
627
+ # if product.strip() in recommendations_count:
628
+ # recommendations_count[product.strip()] += sim_score
629
+ # else:
630
+ # recommendations_count[product.strip()] = sim_score
631
 
632
+ # # Calcular la probabilidad relativa de cada producto recomendado
633
+ # recommendations_with_prob = []
634
+ # if total_similarity > 0: # Verificar que total_similarity no sea cero
635
+ # recommendations_with_prob = [(product, score / total_similarity) for product, score in recommendations_count.items()]
636
+ # else:
637
+ # print("No se encontraron similitudes suficientes para calcular probabilidades.")
638
+
639
+ # recommendations_with_prob.sort(key=lambda x: x[1], reverse=True) # Ordenar por puntuación
640
+
641
+ # # Crear un nuevo DataFrame para almacenar las recomendaciones con descripciones y probabilidades
642
+ # recommendations_df = pd.DataFrame(columns=['ARTICULO', 'DESCRIPCION', 'PROBABILIDAD'])
643
+
644
+ # # Agregar las recomendaciones al DataFrame usando pd.concat
645
+ # for product, prob in recommendations_with_prob:
646
+ # # Buscar la descripción en el DataFrame de productos
647
+ # description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
648
+ # if not description.empty:
649
+ # # Crear un nuevo DataFrame temporal para la recomendación
650
+ # temp_df = pd.DataFrame({
651
+ # 'ARTICULO': [product],
652
+ # 'DESCRIPCION': [description.values[0]], # Obtener el primer valor encontrado
653
+ # 'PROBABILIDAD': [prob]
654
+ # })
655
+ # # Concatenar el DataFrame temporal al DataFrame de recomendaciones
656
+ # recommendations_df = pd.concat([recommendations_df, temp_df], ignore_index=True)
657
+
658
+ # return recommendations_df
659
+
660
+ # # Comprobar si el cliente está en el CSV de fieles
661
+ # is_fiel = customer_code in fieles_df['Cliente'].astype(str).values
662
+
663
+ # if customer_code:
664
+ # if is_fiel:
665
+ # st.write(f"### Customer {customer_code} is a loyal customer.")
666
+ # option = st.selectbox("Select Recommendation Type", ["Select an option", "By Purchase History", "By Current Basket"])
667
+
668
+ # if option == "By Purchase History":
669
+ # st.warning("Option not available... aún")
670
+ # elif option == "By Current Basket":
671
+ # st.write("Select the items and assign quantities for the basket:")
672
+
673
+ # # Mostrar lista de artículos disponibles
674
+ # available_articles = productos['ARTICULO'].unique()
675
+ # selected_articles = st.multiselect("Select Articles", available_articles)
676
+
677
+ # # Crear inputs para ingresar las cantidades de cada artículo seleccionado
678
+ # quantities = {}
679
+ # for article in selected_articles:
680
+ # quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
681
+
682
+ # if st.button("Calcular"): # Añadimos el botón "Calcular"
683
+ # # Crear una lista de artículos basada en la selección
684
+ # new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
685
+
686
+ # if new_basket:
687
+ # # Procesar la lista para recomendar
688
+ # recommendations_df = recomienda(new_basket)
689
+
690
+ # if not recommendations_df.empty:
691
+ # st.write("### Recommendations based on the current basket:")
692
+ # st.dataframe(recommendations_df)
693
+ # else:
694
+ # st.warning("No recommendations found for the provided basket.")
695
+ # else:
696
+ # st.warning("Please select at least one article and set its quantity.")
697
+ # else:
698
+ # st.write(f"### Customer {customer_code} is not a loyal customer.")
699
+ # st.write("Select items and assign quantities for the basket:")
700
 
701
+ # # Mostrar lista de artículos disponibles
702
+ # available_articles = productos['ARTICULO'].unique()
703
+ # selected_articles = st.multiselect("Select Articles", available_articles)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704
 
705
+ # # Crear inputs para ingresar las cantidades de cada artículo seleccionado
706
+ # quantities = {}
707
+ # for article in selected_articles:
708
+ # quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
709
 
710
+ # if st.button("Calcular"): # Añadimos el botón "Calcular"
711
+ # # Crear una lista de artículos basada en la selección
712
+ # new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
713
 
714
+ # if new_basket:
715
+ # # Procesar la lista para recomendar
716
+ # recommendations_df = recomienda(new_basket)
717
 
718
+ # if not recommendations_df.empty:
719
+ # st.write("### Recommendations based on the current basket:")
720
+ # st.dataframe(recommendations_df)
721
+ # else:
722
+ # st.warning("No recommendations found for the provided basket.")
723
+ # else:
724
+ # st.warning("Please select at least one article and set its quantity.")
725
 
726
 
727
  # Customer Analysis Page