GMARTINEZMILLA commited on
Commit
4f71e96
1 Parent(s): 1c4696d

feat: generate the filtering by manufacturer not complete

Browse files
Files changed (1) hide show
  1. app.py +380 -372
app.py CHANGED
@@ -326,395 +326,403 @@ elif page == "🕵️ Análisis de Cliente":
326
 
327
  if not customer_match.empty:
328
  cluster = customer_match['cluster_id'].values[0]
329
-
330
- with st.spinner(f"Seleccionando el modelo predictivo..."):
331
- # Load the Corresponding Model
332
- model_path = f'models/modelo_cluster_{cluster}.txt'
333
- gbm = lgb.Booster(model_file=model_path)
334
-
335
- with st.spinner("Preparando los datos..."):
336
- # Load predict data for that cluster
337
- predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')
338
-
339
- # Convert cliente_id to string
340
- predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)
341
-
342
- with st.spinner("Filtrando data..."):
343
- # Filter for the specific customer
344
- customer_code_str = str(customer_code)
345
- customer_data = predict_data[predict_data['cliente_id'] == customer_code_str]
346
-
347
- with st.spinner("Geneerando predicciones de venta..."):
348
- if not customer_data.empty:
349
- # Define features consistently with the training process
350
- lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
351
- features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']
352
-
353
- # Prepare data for prediction
354
- X_predict = customer_data[features]
355
-
356
- # Convert categorical features to 'category' dtype
357
- categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
358
- for feature in categorical_features:
359
- X_predict[feature] = X_predict[feature].astype('category')
360
 
361
- # Make Prediction for the selected customer
362
- y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)
363
-
364
- # Reassemble the results
365
- results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
366
- results['ventas_predichas'] = y_pred
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
- # Load actual data from df_agg_2024
369
- actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
370
-
371
- if not actual_sales.empty:
372
- # Merge predictions with actual sales
373
- results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
374
- on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
375
- how='left')
376
- results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
377
- else:
378
- # If no actual sales data for 2024, fill 'ventas_reales' with 0
379
- results['ventas_reales'] = 0
380
-
381
- # Ensure any missing sales data is filled with 0
382
- results['ventas_reales'].fillna(0, inplace=True)
383
-
384
- # Define the cutoff date for the last 12 months
385
- fecha_inicio = pd.to_datetime("2023-01-01")
386
- fecha_corte = pd.to_datetime("2024-09-01")
387
-
388
- # Convertir fecha_mes a datetime en el DataFrame historical_data
389
- historical_data['fecha_mes'] = pd.to_datetime(historical_data['fecha_mes'], errors='coerce')
390
-
391
- # Ensure cliente_id is of type string and strip any leading/trailing whitespace
392
- historical_data['cliente_id'] = historical_data['cliente_id'].astype(str).str.strip()
393
- customer_code_str = str(customer_code).strip() # Ensure the customer code is also properly formatted
394
-
395
- filtered_historical_data = historical_data[historical_data['cliente_id'] == customer_code_str]
396
-
397
-
398
- # Filtrar los datos históricos por cliente y por el rango de fechas (2023)
399
- fecha_inicio_2023 = pd.to_datetime("2023-01-01")
400
- fecha_fin_2023 = pd.to_datetime("2023-12-31")
401
-
402
- datos_historicos = historical_data[
403
- (historical_data['cliente_id'] == customer_code_str) &
404
- (historical_data['fecha_mes'] >= fecha_inicio_2023) &
405
- (historical_data['fecha_mes'] <= fecha_fin_2023)
406
- ].groupby('fecha_mes')['precio_total'].sum().reset_index()
407
-
408
- # Renombrar la columna 'precio_total' a 'ventas_historicas' si no está vacía
409
- if not datos_historicos.empty:
410
- datos_historicos.rename(columns={'precio_total': 'ventas_historicas'}, inplace=True)
411
- else:
412
- # Si los datos históricos están vacíos, generar fechas de 2023 con ventas_historicas = 0
413
- fechas_2023 = pd.date_range(start='2023-01-01', end='2023-12-31', freq='M')
414
- datos_historicos = pd.DataFrame({'fecha_mes': fechas_2023, 'ventas_historicas': [0] * len(fechas_2023)})
415
-
416
- # Filtrar los datos de predicciones y ventas reales para 2024
417
- datos_cliente_total = results.groupby('fecha_mes').agg({
418
- 'ventas_reales': 'sum',
419
- 'ventas_predichas': 'sum'
420
- }).reset_index()
421
-
422
- # Asegurarnos de que fecha_mes en datos_cliente_total es datetime
423
- datos_cliente_total['fecha_mes'] = pd.to_datetime(datos_cliente_total['fecha_mes'], errors='coerce')
424
-
425
- # Generar un rango de fechas para 2024 si no hay predicciones
426
- fechas_2024 = pd.date_range(start='2024-01-01', end='2024-12-31', freq='M')
427
- fechas_df_2024 = pd.DataFrame({'fecha_mes': fechas_2024})
428
-
429
- # Asegurarnos de que fecha_mes en fechas_df_2024 es datetime
430
- fechas_df_2024['fecha_mes'] = pd.to_datetime(fechas_df_2024['fecha_mes'], errors='coerce')
431
-
432
- # Combinar datos históricos con predicciones y ventas reales usando un merge
433
- # Usamos how='outer' para asegurarnos de incluir todas las fechas de 2023 y 2024
434
- datos_combinados = pd.merge(datos_historicos, datos_cliente_total, on='fecha_mes', how='outer').sort_values('fecha_mes')
435
-
436
- # Rellenar los NaN: 0 en ventas_historicas donde faltan predicciones, y viceversa
437
- datos_combinados['ventas_historicas'].fillna(0, inplace=True)
438
- datos_combinados['ventas_predichas'].fillna(0, inplace=True)
439
- datos_combinados['ventas_reales'].fillna(0, inplace=True)
440
-
441
- # Crear la gráfica con Plotly
442
- fig = go.Figure()
443
-
444
- # Graficar ventas históricas
445
- fig.add_trace(go.Scatter(
446
- x=datos_combinados['fecha_mes'],
447
- y=datos_combinados['ventas_historicas'],
448
- mode='lines+markers',
449
- name='Ventas Históricas',
450
- line=dict(color='blue')
451
- ))
452
-
453
- # Graficar ventas predichas
454
- fig.add_trace(go.Scatter(
455
- x=datos_combinados['fecha_mes'],
456
- y=datos_combinados['ventas_predichas'],
457
- mode='lines+markers',
458
- name='Ventas Predichas',
459
- line=dict(color='orange')
460
- ))
461
-
462
- # Graficar ventas reales
463
- fig.add_trace(go.Scatter(
464
- x=datos_combinados['fecha_mes'],
465
- y=datos_combinados['ventas_reales'],
466
- mode='lines+markers',
467
- name='Ventas Reales',
468
- line=dict(color='green')
469
- ))
470
-
471
- # Personalizar el layout para enfocarse en 2023 y 2024
472
- fig.update_layout(
473
- title=f"Ventas Históricas, Predichas y Reales para Cliente {customer_code}",
474
- xaxis_title="Fecha",
475
- yaxis_title="Ventas (€)",
476
- height=600,
477
- xaxis_range=[fecha_inicio_2023, pd.to_datetime("2024-09-30")], # Ajustar el rango del eje x a 2023-2024
478
- legend_title="Tipo de Ventas",
479
- hovermode="x unified"
480
- )
481
 
482
- # Mostrar la gráfica en Streamlit
483
- st.plotly_chart(fig)
 
484
 
485
- # Calculate metrics for 2024 data
486
- datos_2024 = datos_combinados[datos_combinados['fecha_mes'].dt.year == 2024]
487
- actual = datos_2024['ventas_reales']
488
- predicted = datos_2024['ventas_predichas']
489
 
490
- def calculate_mape(y_true, y_pred):
491
- mask = y_true != 0
492
- return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
493
 
494
- mae = mean_absolute_error(actual, predicted)
495
- mse = mean_squared_error(actual, predicted)
496
- rmse = np.sqrt(mse)
497
- mape = calculate_mape(actual, predicted)
498
- smape = np.mean(2 * np.abs(actual - predicted) / (np.abs(actual) + np.abs(predicted))) * 100
499
 
500
- # Display metrics
501
- st.subheader("Métricas de Predicción (2024)")
502
- col1, col2, col3, col4 = st.columns(4)
503
- col1.metric("MAE", f"{mae:.2f} €",help="Promedio de la diferencia absoluta entre las predicciones y los valores reales.")
504
- col2.metric("MAPE", f"{mape:.2f}%",help="Porcentaje promedio de error en las predicciones.")
505
- col3.metric("RMSE", f"{rmse:.2f} €",help="Medida de la desviación estándar de los residuos de predicción.")
506
- col4.metric("SMAPE", f"{smape:.2f}%",help="Alternativa al MAPE que maneja mejor los valores cercanos a cero.")
507
 
 
 
 
508
 
509
- # Split space into two columns
510
- col1, col2 = st.columns(2)
 
 
 
511
 
512
- # Column 1: Radar chart for top manufacturers
513
- with col1:
514
- st.subheader("¡Esto tiene buena pinta!")
515
- st.info("Su cliente ha superado las ventas predichas de las siguientes marcas:")
 
 
 
516
 
517
- # Group results by manufacturer to calculate the total predicted and actual sales
518
- grouped_results = results.groupby('marca_id_encoded').agg({
519
  'ventas_reales': 'sum',
520
  'ventas_predichas': 'sum'
521
  }).reset_index()
522
 
523
- # Identify manufacturers that exceeded predicted sales
524
- overperforming_manufacturers = grouped_results[grouped_results['ventas_reales'] > grouped_results['ventas_predichas']].copy()
525
-
526
- if not overperforming_manufacturers.empty:
527
- # Calculate the extra amount (difference between actual and predicted sales)
528
- overperforming_manufacturers['extra_amount'] = overperforming_manufacturers['ventas_reales'] - overperforming_manufacturers['ventas_predichas']
529
-
530
- # Sort by the highest extra amount
531
- overperforming_manufacturers = overperforming_manufacturers.sort_values(by='extra_amount', ascending=False)
532
-
533
- # Limit to top 10 overperforming manufacturers
534
- top_overperformers = overperforming_manufacturers.head(10)
535
-
536
- # Display two cards per row
537
- for i in range(0, len(top_overperformers), 2):
538
- cols = st.columns(2) # Create two columns for two cards in a row
539
-
540
- for j, col in enumerate(cols):
541
- if i + j < len(top_overperformers):
542
- row = top_overperformers.iloc[i + j]
543
- manufacturer_name = get_supplier_name_encoded(row['marca_id_encoded'])
544
- predicted = row['ventas_predichas']
545
- actual = row['ventas_reales']
546
- extra = row['extra_amount']
547
-
548
- # Use st.metric for compact display in each column
549
- with col:
550
- st.metric(
551
- label=f"{manufacturer_name}",
552
- value=f"{actual:.2f}€",
553
- delta=f"Exceeded by {extra:.2f}€",
554
- delta_color="normal"
555
- )
556
-
557
-
558
- # Radar chart logic remains the same
559
- customer_df = df[df["CLIENTE"] == str(customer_code)]
560
- all_manufacturers = customer_df.iloc[:, 1:].T
561
- all_manufacturers.index = all_manufacturers.index.astype(str)
562
-
563
- customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
564
- sales_data = customer_euros.iloc[:, 1:].T
565
- sales_data.index = sales_data.index.astype(str)
566
-
567
- sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
568
- sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
569
- all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
570
-
571
- top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
572
- top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
573
- combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
574
-
575
- combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
576
-
577
- if combined_top:
578
- combined_data = pd.DataFrame({
579
- 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
580
- 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
581
- }).fillna(0)
582
-
583
- combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
584
- non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
585
-
586
- if len(non_zero_manufacturers) < 3:
587
- zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
588
- manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
  else:
590
- manufacturers_to_show = non_zero_manufacturers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
- values = manufacturers_to_show['units'].tolist()
593
- amounts = manufacturers_to_show['sales'].tolist()
594
- manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
595
-
596
- if manufacturers:
597
- fig = radar_chart(manufacturers, values, amounts, f'Gráfico de radar para los {len(manufacturers)} principales fabricantes del cliente {customer_code}')
598
- st.pyplot(fig)
599
-
600
- # Column 2: Alerts and additional analysis
601
- with col2:
602
- st.subheader("¡Puede que tengas que revisar esto!")
603
- st.warning("Se esperaba que tu cliente comprara más productos de las siguientes marcas:")
604
-
605
- # Group results by manufacturer to calculate the total predicted and actual sales
606
- grouped_results = results.groupby('marca_id_encoded').agg({
607
- 'ventas_reales': 'sum',
608
- 'ventas_predichas': 'sum'
609
- }).reset_index()
610
-
611
- # Identify manufacturers that didn't meet predicted sales
612
- underperforming_manufacturers = grouped_results[grouped_results['ventas_reales'] < grouped_results['ventas_predichas']].copy()
613
-
614
- if not underperforming_manufacturers.empty:
615
- # Calculate the missed amount
616
- underperforming_manufacturers['missed_amount'] = underperforming_manufacturers['ventas_predichas'] - underperforming_manufacturers['ventas_reales']
617
-
618
- # Sort by the highest missed amount
619
- underperforming_manufacturers = underperforming_manufacturers.sort_values(by='missed_amount', ascending=False)
620
-
621
- # Limit to top 10 missed amounts
622
- top_misses = underperforming_manufacturers.head(10)
623
-
624
- # Display two cards per row
625
- for i in range(0, len(top_misses), 2):
626
- cols = st.columns(2) # Create two columns for two cards in a row
627
-
628
- for j, col in enumerate(cols):
629
- if i + j < len(top_misses):
630
- row = top_misses.iloc[i + j]
631
- manufacturer_name = get_supplier_name_encoded(row['marca_id_encoded'])
632
- predicted = row['ventas_predichas']
633
- actual = row['ventas_reales']
634
- missed = row['missed_amount']
635
-
636
- # Use st.metric for compact display in each column
637
- with col:
638
- st.metric(
639
- label=f"{manufacturer_name}",
640
- value=f"{actual:.2f}€",
641
- delta=f"Missed by {missed:.2f}€",
642
- delta_color="inverse"
643
- )
644
- else:
645
- st.success("All manufacturers have met or exceeded predicted sales.")
646
-
647
-
648
-
649
- # Gráfico de ventas anuales
650
- ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()
651
-
652
- sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
653
- if all(col in ventas_clientes.columns for col in sales_columns):
654
- customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
655
-
656
- if not customer_sales_data.empty:
657
- customer_sales = customer_sales_data[sales_columns].values[0]
658
- years = ['2021', '2022', '2023']
659
-
660
- # Convert 'fecha_mes' to datetime format if it's not already
661
- if not pd.api.types.is_datetime64_any_dtype(results['fecha_mes']):
662
- results['fecha_mes'] = pd.to_datetime(results['fecha_mes'], errors='coerce')
663
-
664
- # Add the 2024 actual and predicted data
665
- if 'ventas_predichas' in results.columns and 'ventas_reales' in results.columns:
666
- actual_sales_2024 = results[results['fecha_mes'].dt.year == 2024]['ventas_reales'].sum()
667
- predicted_sales_2024 = results[results['fecha_mes'].dt.year == 2024]['ventas_predichas'].sum()
668
-
669
- # Assuming only 9 months of actual data are available, annualize the sales
670
- months_available = 9
671
- actual_sales_2024_annual = (actual_sales_2024 / months_available) * 12
672
-
673
- # Prepare data for the bar chart
674
- sales_values = list(customer_sales) + [actual_sales_2024_annual]
675
- predicted_values = list(customer_sales) + [predicted_sales_2024]
676
-
677
- years.append('2024')
678
-
679
- # Create the bar chart for historical and 2024 data
680
- fig_sales_bar = go.Figure()
681
- fig_sales_bar.add_trace(go.Bar(
682
- x=years[:3],
683
- y=sales_values[:3],
684
- name="Historical Sales",
685
- marker_color='blue'
686
- ))
687
-
688
- fig_sales_bar.add_trace(go.Bar(
689
- x=[years[3]],
690
- y=[sales_values[3]],
691
- name="2024 Actual Sales (Annualized)",
692
- marker_color='green'
693
- ))
694
-
695
- fig_sales_bar.add_trace(go.Bar(
696
- x=[years[3]],
697
- y=[predicted_values[3]],
698
- name="2024 Predicted Sales",
699
- marker_color='orange'
700
- ))
701
-
702
- # Customize layout
703
- fig_sales_bar.update_layout(
704
- title=f"Ventas anuales de tu cliente",
705
- xaxis_title="Year",
706
- yaxis_title="Sales (€)",
707
- barmode='group',
708
- height=600,
709
- legend_title_text="Sales Type",
710
- hovermode="x unified"
711
- )
712
-
713
- # Display the chart
714
- st.plotly_chart(fig_sales_bar, use_container_width=True)
715
-
716
- else:
717
- st.warning(f"No predicted or actual data found for customer {customer_code} for 2024.")
718
 
719
  # Customer Recommendations Page
720
  elif page == "💡 Recomendación de Artículos":
 
326
 
327
  if not customer_match.empty:
328
  cluster = customer_match['cluster_id'].values[0]
329
+
330
+ if fabricante_seleccionado == "Todos":
331
+ # Actuar como el comportamiento actual
332
+ with st.spinner(f"Seleccionando el modelo predictivo..."):
333
+ # Load the Corresponding Model
334
+ model_path = f'models/modelo_cluster_{cluster}.txt'
335
+ gbm = lgb.Booster(model_file=model_path)
336
+
337
+ with st.spinner("Preparando los datos..."):
338
+ # Load predict data for that cluster
339
+ predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
+ # Convert cliente_id to string
342
+ predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)
343
+
344
+ with st.spinner("Filtrando data..."):
345
+ # Filter for the specific customer
346
+ customer_code_str = str(customer_code)
347
+ customer_data = predict_data[predict_data['cliente_id'] == customer_code_str]
348
+
349
+ with st.spinner("Geneerando predicciones de venta..."):
350
+ if not customer_data.empty:
351
+ # Define features consistently with the training process
352
+ lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
353
+ features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']
354
+
355
+ # Prepare data for prediction
356
+ X_predict = customer_data[features]
357
+
358
+ # Convert categorical features to 'category' dtype
359
+ categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
360
+ for feature in categorical_features:
361
+ X_predict[feature] = X_predict[feature].astype('category')
362
+
363
+ # Make Prediction for the selected customer
364
+ y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)
365
+
366
+ # Reassemble the results
367
+ results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
368
+ results['ventas_predichas'] = y_pred
369
+
370
+ # Load actual data from df_agg_2024
371
+ actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
372
+
373
+ if not actual_sales.empty:
374
+ # Merge predictions with actual sales
375
+ results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
376
+ on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
377
+ how='left')
378
+ results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
379
+ else:
380
+ # If no actual sales data for 2024, fill 'ventas_reales' with 0
381
+ results['ventas_reales'] = 0
382
 
383
+ # Ensure any missing sales data is filled with 0
384
+ results['ventas_reales'].fillna(0, inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
+ # Define the cutoff date for the last 12 months
387
+ fecha_inicio = pd.to_datetime("2023-01-01")
388
+ fecha_corte = pd.to_datetime("2024-09-01")
389
 
390
+ # Convertir fecha_mes a datetime en el DataFrame historical_data
391
+ historical_data['fecha_mes'] = pd.to_datetime(historical_data['fecha_mes'], errors='coerce')
 
 
392
 
393
+ # Ensure cliente_id is of type string and strip any leading/trailing whitespace
394
+ historical_data['cliente_id'] = historical_data['cliente_id'].astype(str).str.strip()
395
+ customer_code_str = str(customer_code).strip() # Ensure the customer code is also properly formatted
396
 
397
+ filtered_historical_data = historical_data[historical_data['cliente_id'] == customer_code_str]
 
 
 
 
398
 
 
 
 
 
 
 
 
399
 
400
+ # Filtrar los datos históricos por cliente y por el rango de fechas (2023)
401
+ fecha_inicio_2023 = pd.to_datetime("2023-01-01")
402
+ fecha_fin_2023 = pd.to_datetime("2023-12-31")
403
 
404
+ datos_historicos = historical_data[
405
+ (historical_data['cliente_id'] == customer_code_str) &
406
+ (historical_data['fecha_mes'] >= fecha_inicio_2023) &
407
+ (historical_data['fecha_mes'] <= fecha_fin_2023)
408
+ ].groupby('fecha_mes')['precio_total'].sum().reset_index()
409
 
410
+ # Renombrar la columna 'precio_total' a 'ventas_historicas' si no está vacía
411
+ if not datos_historicos.empty:
412
+ datos_historicos.rename(columns={'precio_total': 'ventas_historicas'}, inplace=True)
413
+ else:
414
+ # Si los datos históricos están vacíos, generar fechas de 2023 con ventas_historicas = 0
415
+ fechas_2023 = pd.date_range(start='2023-01-01', end='2023-12-31', freq='M')
416
+ datos_historicos = pd.DataFrame({'fecha_mes': fechas_2023, 'ventas_historicas': [0] * len(fechas_2023)})
417
 
418
+ # Filtrar los datos de predicciones y ventas reales para 2024
419
+ datos_cliente_total = results.groupby('fecha_mes').agg({
420
  'ventas_reales': 'sum',
421
  'ventas_predichas': 'sum'
422
  }).reset_index()
423
 
424
+ # Asegurarnos de que fecha_mes en datos_cliente_total es datetime
425
+ datos_cliente_total['fecha_mes'] = pd.to_datetime(datos_cliente_total['fecha_mes'], errors='coerce')
426
+
427
+ # Generar un rango de fechas para 2024 si no hay predicciones
428
+ fechas_2024 = pd.date_range(start='2024-01-01', end='2024-12-31', freq='M')
429
+ fechas_df_2024 = pd.DataFrame({'fecha_mes': fechas_2024})
430
+
431
+ # Asegurarnos de que fecha_mes en fechas_df_2024 es datetime
432
+ fechas_df_2024['fecha_mes'] = pd.to_datetime(fechas_df_2024['fecha_mes'], errors='coerce')
433
+
434
+ # Combinar datos históricos con predicciones y ventas reales usando un merge
435
+ # Usamos how='outer' para asegurarnos de incluir todas las fechas de 2023 y 2024
436
+ datos_combinados = pd.merge(datos_historicos, datos_cliente_total, on='fecha_mes', how='outer').sort_values('fecha_mes')
437
+
438
+ # Rellenar los NaN: 0 en ventas_historicas donde faltan predicciones, y viceversa
439
+ datos_combinados['ventas_historicas'].fillna(0, inplace=True)
440
+ datos_combinados['ventas_predichas'].fillna(0, inplace=True)
441
+ datos_combinados['ventas_reales'].fillna(0, inplace=True)
442
+
443
+ # Crear la gráfica con Plotly
444
+ fig = go.Figure()
445
+
446
+ # Graficar ventas históricas
447
+ fig.add_trace(go.Scatter(
448
+ x=datos_combinados['fecha_mes'],
449
+ y=datos_combinados['ventas_historicas'],
450
+ mode='lines+markers',
451
+ name='Ventas Históricas',
452
+ line=dict(color='blue')
453
+ ))
454
+
455
+ # Graficar ventas predichas
456
+ fig.add_trace(go.Scatter(
457
+ x=datos_combinados['fecha_mes'],
458
+ y=datos_combinados['ventas_predichas'],
459
+ mode='lines+markers',
460
+ name='Ventas Predichas',
461
+ line=dict(color='orange')
462
+ ))
463
+
464
+ # Graficar ventas reales
465
+ fig.add_trace(go.Scatter(
466
+ x=datos_combinados['fecha_mes'],
467
+ y=datos_combinados['ventas_reales'],
468
+ mode='lines+markers',
469
+ name='Ventas Reales',
470
+ line=dict(color='green')
471
+ ))
472
+
473
+ # Personalizar el layout para enfocarse en 2023 y 2024
474
+ fig.update_layout(
475
+ title=f"Ventas Históricas, Predichas y Reales para Cliente {customer_code}",
476
+ xaxis_title="Fecha",
477
+ yaxis_title="Ventas (€)",
478
+ height=600,
479
+ xaxis_range=[fecha_inicio_2023, pd.to_datetime("2024-09-30")], # Ajustar el rango del eje x a 2023-2024
480
+ legend_title="Tipo de Ventas",
481
+ hovermode="x unified"
482
+ )
483
+
484
+ # Mostrar la gráfica en Streamlit
485
+ st.plotly_chart(fig)
486
+
487
+ # Calculate metrics for 2024 data
488
+ datos_2024 = datos_combinados[datos_combinados['fecha_mes'].dt.year == 2024]
489
+ actual = datos_2024['ventas_reales']
490
+ predicted = datos_2024['ventas_predichas']
491
+
492
+ def calculate_mape(y_true, y_pred):
493
+ mask = y_true != 0
494
+ return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
495
+
496
+ mae = mean_absolute_error(actual, predicted)
497
+ mse = mean_squared_error(actual, predicted)
498
+ rmse = np.sqrt(mse)
499
+ mape = calculate_mape(actual, predicted)
500
+ smape = np.mean(2 * np.abs(actual - predicted) / (np.abs(actual) + np.abs(predicted))) * 100
501
+
502
+ # Display metrics
503
+ st.subheader("Métricas de Predicción (2024)")
504
+ col1, col2, col3, col4 = st.columns(4)
505
+ col1.metric("MAE", f"{mae:.2f} €",help="Promedio de la diferencia absoluta entre las predicciones y los valores reales.")
506
+ col2.metric("MAPE", f"{mape:.2f}%",help="Porcentaje promedio de error en las predicciones.")
507
+ col3.metric("RMSE", f"{rmse:.2f} €",help="Medida de la desviación estándar de los residuos de predicción.")
508
+ col4.metric("SMAPE", f"{smape:.2f}%",help="Alternativa al MAPE que maneja mejor los valores cercanos a cero.")
509
+
510
+
511
+ # Split space into two columns
512
+ col1, col2 = st.columns(2)
513
+
514
+ # Column 1: Radar chart for top manufacturers
515
+ with col1:
516
+ st.subheader("¡Esto tiene buena pinta!")
517
+ st.info("Su cliente ha superado las ventas predichas de las siguientes marcas:")
518
+
519
+ # Group results by manufacturer to calculate the total predicted and actual sales
520
+ grouped_results = results.groupby('marca_id_encoded').agg({
521
+ 'ventas_reales': 'sum',
522
+ 'ventas_predichas': 'sum'
523
+ }).reset_index()
524
+
525
+ # Identify manufacturers that exceeded predicted sales
526
+ overperforming_manufacturers = grouped_results[grouped_results['ventas_reales'] > grouped_results['ventas_predichas']].copy()
527
+
528
+ if not overperforming_manufacturers.empty:
529
+ # Calculate the extra amount (difference between actual and predicted sales)
530
+ overperforming_manufacturers['extra_amount'] = overperforming_manufacturers['ventas_reales'] - overperforming_manufacturers['ventas_predichas']
531
+
532
+ # Sort by the highest extra amount
533
+ overperforming_manufacturers = overperforming_manufacturers.sort_values(by='extra_amount', ascending=False)
534
+
535
+ # Limit to top 10 overperforming manufacturers
536
+ top_overperformers = overperforming_manufacturers.head(10)
537
+
538
+ # Display two cards per row
539
+ for i in range(0, len(top_overperformers), 2):
540
+ cols = st.columns(2) # Create two columns for two cards in a row
541
+
542
+ for j, col in enumerate(cols):
543
+ if i + j < len(top_overperformers):
544
+ row = top_overperformers.iloc[i + j]
545
+ manufacturer_name = get_supplier_name_encoded(row['marca_id_encoded'])
546
+ predicted = row['ventas_predichas']
547
+ actual = row['ventas_reales']
548
+ extra = row['extra_amount']
549
+
550
+ # Use st.metric for compact display in each column
551
+ with col:
552
+ st.metric(
553
+ label=f"{manufacturer_name}",
554
+ value=f"{actual:.2f}€",
555
+ delta=f"Exceeded by {extra:.2f}€",
556
+ delta_color="normal"
557
+ )
558
+
559
+
560
+ # Radar chart logic remains the same
561
+ customer_df = df[df["CLIENTE"] == str(customer_code)]
562
+ all_manufacturers = customer_df.iloc[:, 1:].T
563
+ all_manufacturers.index = all_manufacturers.index.astype(str)
564
+
565
+ customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
566
+ sales_data = customer_euros.iloc[:, 1:].T
567
+ sales_data.index = sales_data.index.astype(str)
568
+
569
+ sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
570
+ sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
571
+ all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
572
+
573
+ top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
574
+ top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
575
+ combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
576
+
577
+ combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
578
+
579
+ if combined_top:
580
+ combined_data = pd.DataFrame({
581
+ 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
582
+ 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
583
+ }).fillna(0)
584
+
585
+ combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
586
+ non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
587
+
588
+ if len(non_zero_manufacturers) < 3:
589
+ zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
590
+ manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
591
+ else:
592
+ manufacturers_to_show = non_zero_manufacturers
593
+
594
+ values = manufacturers_to_show['units'].tolist()
595
+ amounts = manufacturers_to_show['sales'].tolist()
596
+ manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
597
+
598
+ if manufacturers:
599
+ fig = radar_chart(manufacturers, values, amounts, f'Gráfico de radar para los {len(manufacturers)} principales fabricantes del cliente {customer_code}')
600
+ st.pyplot(fig)
601
+
602
+ # Column 2: Alerts and additional analysis
603
+ with col2:
604
+ st.subheader("¡Puede que tengas que revisar esto!")
605
+ st.warning("Se esperaba que tu cliente comprara más productos de las siguientes marcas:")
606
+
607
+ # Group results by manufacturer to calculate the total predicted and actual sales
608
+ grouped_results = results.groupby('marca_id_encoded').agg({
609
+ 'ventas_reales': 'sum',
610
+ 'ventas_predichas': 'sum'
611
+ }).reset_index()
612
+
613
+ # Identify manufacturers that didn't meet predicted sales
614
+ underperforming_manufacturers = grouped_results[grouped_results['ventas_reales'] < grouped_results['ventas_predichas']].copy()
615
+
616
+ if not underperforming_manufacturers.empty:
617
+ # Calculate the missed amount
618
+ underperforming_manufacturers['missed_amount'] = underperforming_manufacturers['ventas_predichas'] - underperforming_manufacturers['ventas_reales']
619
+
620
+ # Sort by the highest missed amount
621
+ underperforming_manufacturers = underperforming_manufacturers.sort_values(by='missed_amount', ascending=False)
622
+
623
+ # Limit to top 10 missed amounts
624
+ top_misses = underperforming_manufacturers.head(10)
625
+
626
+ # Display two cards per row
627
+ for i in range(0, len(top_misses), 2):
628
+ cols = st.columns(2) # Create two columns for two cards in a row
629
+
630
+ for j, col in enumerate(cols):
631
+ if i + j < len(top_misses):
632
+ row = top_misses.iloc[i + j]
633
+ manufacturer_name = get_supplier_name_encoded(row['marca_id_encoded'])
634
+ predicted = row['ventas_predichas']
635
+ actual = row['ventas_reales']
636
+ missed = row['missed_amount']
637
+
638
+ # Use st.metric for compact display in each column
639
+ with col:
640
+ st.metric(
641
+ label=f"{manufacturer_name}",
642
+ value=f"{actual:.2f}€",
643
+ delta=f"Missed by {missed:.2f}€",
644
+ delta_color="inverse"
645
+ )
646
  else:
647
+ st.success("All manufacturers have met or exceeded predicted sales.")
648
+
649
+
650
+
651
+ # Gráfico de ventas anuales
652
+ ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()
653
+
654
+ sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
655
+ if all(col in ventas_clientes.columns for col in sales_columns):
656
+ customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
657
+
658
+ if not customer_sales_data.empty:
659
+ customer_sales = customer_sales_data[sales_columns].values[0]
660
+ years = ['2021', '2022', '2023']
661
+
662
+ # Convert 'fecha_mes' to datetime format if it's not already
663
+ if not pd.api.types.is_datetime64_any_dtype(results['fecha_mes']):
664
+ results['fecha_mes'] = pd.to_datetime(results['fecha_mes'], errors='coerce')
665
+
666
+ # Add the 2024 actual and predicted data
667
+ if 'ventas_predichas' in results.columns and 'ventas_reales' in results.columns:
668
+ actual_sales_2024 = results[results['fecha_mes'].dt.year == 2024]['ventas_reales'].sum()
669
+ predicted_sales_2024 = results[results['fecha_mes'].dt.year == 2024]['ventas_predichas'].sum()
670
+
671
+ # Assuming only 9 months of actual data are available, annualize the sales
672
+ months_available = 9
673
+ actual_sales_2024_annual = (actual_sales_2024 / months_available) * 12
674
+
675
+ # Prepare data for the bar chart
676
+ sales_values = list(customer_sales) + [actual_sales_2024_annual]
677
+ predicted_values = list(customer_sales) + [predicted_sales_2024]
678
+
679
+ years.append('2024')
680
+
681
+ # Create the bar chart for historical and 2024 data
682
+ fig_sales_bar = go.Figure()
683
+ fig_sales_bar.add_trace(go.Bar(
684
+ x=years[:3],
685
+ y=sales_values[:3],
686
+ name="Historical Sales",
687
+ marker_color='blue'
688
+ ))
689
+
690
+ fig_sales_bar.add_trace(go.Bar(
691
+ x=[years[3]],
692
+ y=[sales_values[3]],
693
+ name="2024 Actual Sales (Annualized)",
694
+ marker_color='green'
695
+ ))
696
+
697
+ fig_sales_bar.add_trace(go.Bar(
698
+ x=[years[3]],
699
+ y=[predicted_values[3]],
700
+ name="2024 Predicted Sales",
701
+ marker_color='orange'
702
+ ))
703
+
704
+ # Customize layout
705
+ fig_sales_bar.update_layout(
706
+ title=f"Ventas anuales de tu cliente",
707
+ xaxis_title="Year",
708
+ yaxis_title="Sales (€)",
709
+ barmode='group',
710
+ height=600,
711
+ legend_title_text="Sales Type",
712
+ hovermode="x unified"
713
+ )
714
+
715
+ # Display the chart
716
+ st.plotly_chart(fig_sales_bar, use_container_width=True)
717
+
718
+ else:
719
+ st.warning(f"No predicted or actual data found for customer {customer_code} for 2024.")
720
 
721
+ else:
722
+ with st.spinner(f"Mostrando datos para el fabricante {fabricante_seleccionado}..."):
723
+ # Mostrar el cliente y el fabricante seleccionados
724
+ st.write(f"**Cliente seleccionado:** {customer_code}")
725
+ st.write(f"**Fabricante seleccionado:** {fabricante_seleccionado}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
 
727
  # Customer Recommendations Page
728
  elif page == "💡 Recomendación de Artículos":