Spaces:

GMARTINEZMILLA
/

Final_Project

Sleeping

App Files Files Community

Final_Project / app.py

GMARTINEZMILLA

feat: updated website

ca83516 about 2 months ago

raw

history blame

58.2 kB

	import streamlit as st
	import time
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import matplotlib.pyplot as plt
	import numpy as np
	import lightgbm as lgb
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from sklearn.metrics import mean_absolute_error, mean_squared_error
	from joblib import dump, load
	from utils import recomienda_tfid

	# Page configuration
	st.set_page_config(page_title="DeepInsightz", page_icon=":bar_chart:", layout="wide")

	# Custom CSS for dynamic theme styling
	# Streamlit detects light and dark mode automatically via the user's settings in Hugging Face Spaces
	if st.get_option("theme.base") == "dark":
	background_color = "#282828"
	text_color = "white"
	metric_box_color = "#4f4f4f"
	sidebar_color = "#282828"
	plot_bgcolor = "rgba(0, 0, 0, 0)"
	primary_color = '#00FF00' # for positive delta
	negative_color = '#FF0000' # for negative delta
	else:
	background_color = "#f4f4f4"
	text_color = "#black"
	metric_box_color = "#dee2e8"
	sidebar_color = "#dee2e8"
	plot_bgcolor = "#f4f4f4"
	primary_color = '#228B22' # for positive delta in light mode
	negative_color = '#8B0000' # for negative delta in light mode

	st.markdown(f"""
	<style>
	body {{
	background-color: {background_color};
	color: {text_color};
	}}
	[data-testid="stMetric"] {{
	background-color: {metric_box_color};
	border-radius: 10px;
	text-align: center;
	padding: 15px 0;
	margin-bottom: 20px;
	}}
	[data-testid="stMetricLabel"] {{
	display: flex;
	justify-content: center;
	align-items: center;
	color: {text_color};
	}}
	[data-testid="stSidebar"] {{
	background-color: {sidebar_color};
	}}
	</style>
	""", unsafe_allow_html=True)


	# Load CSV files at the top
	df = pd.read_csv("df_clean.csv")
	nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
	euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
	ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
	customer_clusters = pd.read_csv('predicts/customer_clusters.csv') # Load the customer clusters here
	df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
	pca_data_5 = pd.read_csv('pca_data.csv')
	historical_data = pd.read_csv('historical_data.csv')

	with st.sidebar:
	st.sidebar.title("DeepInsightz")
	page = st.sidebar.selectbox("Select the tool you want to use", ["Summary", "Customer Analysis", "Articles Recommendations"])

	# If the user selects "Customer Analysis," show filter options in the sidebar
	if page == "Customer Analysis":
	st.sidebar.title("Filter Options")
	show_all = st.sidebar.checkbox('Show All Manufacturers', value=True)

	# If not showing all, allow filtering by manufacturer code
	if not show_all:
	selected_manufacturer = st.sidebar.selectbox(
	'Select Manufacturer Code',
	historical_data['marca_id_encoded'].unique()
	)

	# Generamos la columna total_sales
	ventas_clientes['total_sales'] = ventas_clientes[['VENTA_2021', 'VENTA_2022', 'VENTA_2023']].sum(axis=1)
	# Ordenar los clientes de mayor a menor según sus ventas totales
	ventas_top_100 = ventas_clientes.sort_values(by='total_sales', ascending=False).head(100)


	# Ensure customer codes are strings
	df['CLIENTE'] = df['CLIENTE'].astype(str)
	nombres_proveedores['codigo'] = nombres_proveedores['codigo'].astype(str)
	euros_proveedor['CLIENTE'] = euros_proveedor['CLIENTE'].astype(str)
	customer_clusters['cliente_id'] = customer_clusters['cliente_id'].astype(str) # Ensure customer IDs are strings
	fieles_df = pd.read_csv("clientes_relevantes.csv")
	cestas = pd.read_csv("cestas.csv")
	productos = pd.read_csv("productos.csv")
	df_agg_2024['cliente_id'] = df_agg_2024['cliente_id'].astype(str)
	marca_id_mapping = load('marca_id_mapping.joblib')

	# Convert all columns except 'CLIENTE' to float in euros_proveedor
	for col in euros_proveedor.columns:
	if col != 'CLIENTE':
	euros_proveedor[col] = pd.to_numeric(euros_proveedor[col], errors='coerce')

	# Check for NaN values after conversion
	if euros_proveedor.isna().any().any():
	st.warning("Some values in euros_proveedor couldn't be converted to numbers. Please review the input data.")

	# Ignore the last two columns of df
	df = df.iloc[:, :-2]

	# Function to get supplier name
	def get_supplier_name(code):
	code = str(code) # Ensure code is a string
	name = nombres_proveedores[nombres_proveedores['codigo'] == code]['nombre'].values
	return name[0] if len(name) > 0 else code

	def get_supplier_name_encoded(encoded_code):
	try:
	# Ensure the encoded code is an integer
	encoded_code = int(encoded_code)
	print(f"Encoded Code: {encoded_code}")

	# Use the label encoder to map the encoded code back to the original manufacturer code
	if encoded_code < len(marca_id_mapping.classes_):
	real_code = marca_id_mapping.inverse_transform([encoded_code])[0]
	print(f"Real Manufacturer Code: {real_code}")
	else:
	print(f"Encoded code not found in the label encoder: {encoded_code}")
	return f"Unknown code: {encoded_code}" # Handle case where encoded code is not found

	# Now, use the real_code to find the manufacturer name in nombres_proveedores
	name = nombres_proveedores[nombres_proveedores['codigo'] == str(real_code)]['nombre'].values
	print(f"Manufacturer Name Found: {name}") # Check what name is returned

	# Return the manufacturer name if found, otherwise return the real_code
	return name[0] if len(name) > 0 else real_code

	except Exception as e:
	print(f"Error encountered: {e}")
	return f"Error for code: {encoded_code}"

	# Custom Donut Chart with Plotly for Inbound/Outbound Percentage
	def create_donut_chart(values, labels, color_scheme, title):
	fig = px.pie(
	values=values,
	names=labels,
	hole=0.7,
	color_discrete_sequence=color_scheme
	)
	fig.update_traces(textinfo='percent+label', hoverinfo='label+percent', textposition='inside', showlegend=False)
	fig.update_layout(
	annotations=[dict(text=f"{int(values[1])}%", x=0.5, y=0.5, font_size=40, showarrow=False)],
	title=title,
	height=300,
	margin=dict(t=30, b=10, l=10, r=10),
	paper_bgcolor=plot_bgcolor, # Use theme-dependent background color
	plot_bgcolor=plot_bgcolor
	)
	return fig

	# Donut chart with color scheme based on theme
	if st.get_option("theme.base") == "dark":
	donut_color_scheme = ['#155F7A', '#29b5e8'] # Dark mode colors
	else:
	donut_color_scheme = ['#007BFF', '#66b5ff'] # Light mode colors

	# Function to create radar chart with square root transformation
	def radar_chart(categories, values, amounts, title):
	N = len(categories)
	angles = [n / float(N) * 2 * np.pi for n in range(N)]
	angles += angles[:1]

	fig, ax = plt.subplots(figsize=(12, 12), subplot_kw=dict(projection='polar'))

	# Apply square root transformation
	sqrt_values = np.sqrt(values)
	sqrt_amounts = np.sqrt(amounts)

	max_sqrt_value = max(sqrt_values)
	normalized_values = [v / max_sqrt_value for v in sqrt_values]

	# Adjust scaling for spend values
	max_sqrt_amount = max(sqrt_amounts)
	scaling_factor = 0.7 # Adjust this value to control how much the spend values are scaled up
	normalized_amounts = [min((a / max_sqrt_amount) * scaling_factor, 1.0) for a in sqrt_amounts]

	normalized_values += normalized_values[:1]
	ax.plot(angles, normalized_values, 'o-', linewidth=2, color='#FF69B4', label='% Units (sqrt)')
	ax.fill(angles, normalized_values, alpha=0.25, color='#FF69B4')

	normalized_amounts += normalized_amounts[:1]
	ax.plot(angles, normalized_amounts, 'o-', linewidth=2, color='#4B0082', label='% Spend (sqrt)')
	ax.fill(angles, normalized_amounts, alpha=0.25, color='#4B0082')

	ax.set_xticks(angles[:-1])
	ax.set_xticklabels(categories, size=8, wrap=True)
	ax.set_ylim(0, 1)

	circles = np.linspace(0, 1, 5)
	for circle in circles:
	ax.plot(angles, [circle]*len(angles), '--', color='gray', alpha=0.3, linewidth=0.5)

	ax.set_yticklabels([])
	ax.spines['polar'].set_visible(False)

	plt.title(title, size=16, y=1.1)
	plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))

	return fig



	if page == "Summary":
	# st.title("Welcome to DeepInsightz")
	# st.markdown("""
	# ### Data-driven Customer Clustering
	# We analyzed thousands of customers and suppliers to help businesses make smarter sales decisions.
	# """)

	# Create layout with three columns
	col1, col2, col3 = st.columns((1.5, 4, 2.5), gap='medium')

	# Left Column (Red): Metrics and Donut Charts
	with col1:
	st.markdown('#### General Information')
	st.metric(label="Range of Dates", value="2021-2023")
	st.metric(label="Customers Analysed", value="3.000")
	st.metric(label="Unique Products Sold", value="10.702")
	st.metric(label="Total Sales Instances", value="764.396")



	# Middle Column (White): 3D Cluster Model and Bar Chart
	with col2:
	st.markdown('#### 3D Customer Clusters')

	# Create 3D PCA plot using actual data from pca_data_5
	fig_cluster = px.scatter_3d(
	pca_data_5,
	x='PC1',
	y='PC2',
	z='PC3',
	color='cluster_id',
	hover_name='CustomerID',
	)
	fig_cluster.update_layout(
	scene=dict(aspectratio=dict(x=1, y=1, z=0.8)), # Adjusted aspect ratio for better balance
	margin=dict(t=10, b=10, l=10, r=10), # Tighten margins further
	height=600, # Slightly increased height for better visibility
	)
	st.plotly_chart(fig_cluster, use_container_width=True)

	# Right Column (Blue): Key Metrics Overview and Data Preparation Summary
	with col3:
	# Mostrar la tabla con los 100 mejores clientes
	st.markdown('#### Top 100 Clients by Total Sales')

	# Configurar columnas para mostrar los clientes y las ventas totales
	st.dataframe(ventas_top_100[['codigo_cliente', 'total_sales']],
	column_order=("codigo_cliente", "total_sales"),
	hide_index=True,
	width=450, # Ajustar el ancho de la tabla
	height=600, # Ajustar la altura de la tabla
	column_config={
	"codigo_cliente": st.column_config.TextColumn(
	"Client Code",
	),
	"total_sales": st.column_config.ProgressColumn(
	"Total Sales (€)",
	format="%d",
	min_value=0,
	max_value=ventas_top_100['total_sales'].max()
	)}
	)
	# Customer Analysis Page
	elif page == "Customer Analysis":
	st.markdown("""
	<h2 style='text-align: center; font-size: 2.5rem;'>Customer Analysis</h2>
	<p style='text-align: center; font-size: 1.2rem; color: gray;'>
	Enter the customer code to explore detailed customer insights,
	including past sales, predictions for the current year, and manufacturer-specific information.
	</p>
	""", unsafe_allow_html=True)

	# Combine text input and dropdown into a single searchable selectbox
	customer_code = st.selectbox(
	"Search and Select Customer Code",
	df['CLIENTE'].unique(), # All customer codes

	format_func=lambda x: str(x), # Ensures the values are displayed as strings
	help="Start typing to search for a specific customer code"
	)

	if st.button("Calcular"):
	if customer_code:
	with st.spinner("We are identifying the customer's cluster..."):
	# Find Customer's Cluster
	customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
	time.sleep(1)

	if not customer_match.empty:
	cluster = customer_match['cluster_id'].values[0]

	with st.spinner(f"Selecting predictive model..."):
	# Load the Corresponding Model
	model_path = f'models/modelo_cluster_{cluster}.txt'
	gbm = lgb.Booster(model_file=model_path)

	with st.spinner("Getting the data ready..."):
	# Load predict data for that cluster
	predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')

	# Convert cliente_id to string
	predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)

	with st.spinner("Filtering data..."):
	# Filter for the specific customer
	customer_code_str = str(customer_code)
	customer_data = predict_data[predict_data['cliente_id'] == customer_code_str]

	with st.spinner("Generating sales predictions..."):
	if not customer_data.empty:
	# Define features consistently with the training process
	lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
	features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']

	# Prepare data for prediction
	X_predict = customer_data[features]

	# Convert categorical features to 'category' dtype
	categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
	for feature in categorical_features:
	X_predict[feature] = X_predict[feature].astype('category')

	# Make Prediction for the selected customer
	y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)

	# Reassemble the results
	results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
	results['ventas_predichas'] = y_pred

	# Load actual data from df_agg_2024
	actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]

	if not actual_sales.empty:
	# Merge predictions with actual sales
	results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
	on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
	how='left')
	results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
	else:
	# If no actual sales data for 2024, fill 'ventas_reales' with 0
	results['ventas_reales'] = 0

	# Ensure any missing sales data is filled with 0
	results['ventas_reales'].fillna(0, inplace=True)

	# Define the cutoff date for the last 12 months
	fecha_inicio = pd.to_datetime("2023-01-01")
	fecha_corte = pd.to_datetime("2024-09-01")

	# Convertir fecha_mes a datetime en ambos DataFrames
	historical_data['fecha_mes'] = pd.to_datetime(historical_data['fecha_mes'], errors='coerce')
	results['fecha_mes'] = pd.to_datetime(results['fecha_mes'], errors='coerce')

	# Filtrar los datos históricos por cliente y por el rango de fechas (2023)
	fecha_inicio_2023 = pd.to_datetime("2023-01-01")
	fecha_fin_2023 = pd.to_datetime("2023-12-31")

	datos_historicos = historical_data[
	(historical_data['cliente_id'] == customer_code_str) &
	(historical_data['fecha_mes'] >= fecha_inicio_2023) &
	(historical_data['fecha_mes'] <= fecha_fin_2023)
	].groupby('fecha_mes')['precio_total'].sum().reset_index()

	# Asegurarnos de que fecha_mes en datos_historicos es datetime
	datos_historicos['fecha_mes'] = pd.to_datetime(datos_historicos['fecha_mes'], errors='coerce')

	# Generar un rango de fechas desde enero 2023 hasta diciembre 2023 si no hay datos
	if datos_historicos.empty:
	fechas_2023 = pd.date_range(start='2023-01-01', end='2023-12-31', freq='M')
	datos_historicos = pd.DataFrame({'fecha_mes': fechas_2023, 'ventas_historicas': [0] * len(fechas_2023)})

	# Renombrar la columna 'precio_total' a 'ventas_historicas' si hay datos
	else:
	datos_historicos.rename(columns={'precio_total': 'ventas_historicas'}, inplace=True)

	# Filtrar los datos de predicciones y ventas reales para 2024
	datos_cliente_total = results.groupby('fecha_mes').agg({
	'ventas_reales': 'sum',
	'ventas_predichas': 'sum'
	}).reset_index()

	# Asegurarnos de que fecha_mes en datos_cliente_total es datetime
	datos_cliente_total['fecha_mes'] = pd.to_datetime(datos_cliente_total['fecha_mes'], errors='coerce')

	# Generar un rango de fechas para 2024 si no hay predicciones
	fechas_2024 = pd.date_range(start='2024-01-01', end='2024-12-31', freq='M')
	fechas_df_2024 = pd.DataFrame({'fecha_mes': fechas_2024})

	# Asegurarnos de que fecha_mes en fechas_df_2024 es datetime
	fechas_df_2024['fecha_mes'] = pd.to_datetime(fechas_df_2024['fecha_mes'], errors='coerce')

	# Combinar datos históricos con predicciones y ventas reales usando un merge
	datos_combinados = pd.merge(fechas_df_2024, datos_cliente_total, on='fecha_mes', how='outer').sort_values('fecha_mes')

	# Rellenar los NaN: 0 en ventas_historicas donde faltan predicciones, y viceversa
	datos_combinados['ventas_historicas'].fillna(0, inplace=True)
	datos_combinados['ventas_predichas'].fillna(0, inplace=True)
	datos_combinados['ventas_reales'].fillna(0, inplace=True)

	# Mostrar los datos combinados
	st.subheader("Datos combinados con valores faltantes llenados")
	st.write(datos_combinados)

	# Generar la gráfica
	st.markdown("### Sales History, Predictions, and Real Sales")

	# Crear la gráfica con Plotly
	fig = go.Figure()

	# Graficar ventas históricas
	fig.add_trace(go.Scatter(
	x=datos_combinados['fecha_mes'],
	y=datos_combinados['ventas_historicas'],
	mode='lines+markers',
	name='Ventas Históricas',
	line=dict(color='blue')
	))

	# Graficar ventas predichas
	fig.add_trace(go.Scatter(
	x=datos_combinados['fecha_mes'],
	y=datos_combinados['ventas_predichas'],
	mode='lines+markers',
	name='Ventas Predichas',
	line=dict(color='orange')
	))

	# Graficar ventas reales
	fig.add_trace(go.Scatter(
	x=datos_combinados['fecha_mes'],
	y=datos_combinados['ventas_reales'],
	mode='lines+markers',
	name='Ventas Reales',
	line=dict(color='green')
	))

	# Personalizar el layout para enfocarse en 2023 y 2024
	fig.update_layout(
	title=f"Ventas Históricas, Predichas y Reales para Cliente {customer_code}",
	xaxis_title="Fecha",
	yaxis_title="Ventas (€)",
	height=600,
	xaxis_range=[fecha_inicio_2023, pd.to_datetime("2024-09-30")], # Ajustar el rango del eje x a 2023-2024
	legend_title="Tipo de Ventas",
	hovermode="x unified"
	)

	# Mostrar la gráfica en Streamlit
	st.plotly_chart(fig)


	# Split space into two columns
	col1, col2 = st.columns(2)

	# Column 1: Radar chart for top manufacturers
	with col1:
	st.subheader("This looks great!")
	st.info("Your customer did exceed predicted sales from the following brands:")

	# Identify manufacturers that exceeded predicted sales
	overperforming_manufacturers = results[results['ventas_reales'] > results['ventas_predichas']].copy()

	if not overperforming_manufacturers.empty:
	# Calculate the extra amount (difference between actual and predicted sales)
	overperforming_manufacturers['extra_amount'] = overperforming_manufacturers['ventas_reales'] - overperforming_manufacturers['ventas_predichas']

	# Sort by the highest extra amount
	overperforming_manufacturers = overperforming_manufacturers.sort_values(by='extra_amount', ascending=False)

	# Limit to top 10 overperforming manufacturers
	top_overperformers = overperforming_manufacturers.head(10)

	# Display two cards per row
	for i in range(0, len(top_overperformers), 2):
	cols = st.columns(2) # Create two columns for two cards in a row

	for j, col in enumerate(cols):
	if i + j < len(top_overperformers):
	row = top_overperformers.iloc[i + j]
	manufacturer_name = get_supplier_name_encoded(row['marca_id_encoded'])
	predicted = row['ventas_predichas']
	actual = row['ventas_reales']
	extra = row['extra_amount']

	# Use st.metric for compact display in each column
	with col:
	st.metric(
	label=f"{manufacturer_name}",
	value=f"{actual:.2f}€",
	delta=f"Exceeded by {extra:.2f}€",
	delta_color="normal"
	)


	# Radar chart logic remains the same
	customer_df = df[df["CLIENTE"] == str(customer_code)]
	all_manufacturers = customer_df.iloc[:, 1:].T
	all_manufacturers.index = all_manufacturers.index.astype(str)

	customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
	sales_data = customer_euros.iloc[:, 1:].T
	sales_data.index = sales_data.index.astype(str)

	sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
	sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
	all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')

	top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
	top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
	combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]

	combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]

	if combined_top:
	combined_data = pd.DataFrame({
	'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
	'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
	}).fillna(0)

	combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
	non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]

	if len(non_zero_manufacturers) < 3:
	zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
	manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
	else:
	manufacturers_to_show = non_zero_manufacturers

	values = manufacturers_to_show['units'].tolist()
	amounts = manufacturers_to_show['sales'].tolist()
	manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]

	if manufacturers:
	fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
	st.pyplot(fig)

	# Column 2: Alerts and additional analysis
	with col2:
	st.subheader("You might need to check this!")
	st.warning("Your customer was expected to buy more products from the following brands:")

	# Identify manufacturers that didn't meet predicted sales
	underperforming_manufacturers = results[results['ventas_reales'] < results['ventas_predichas']].copy()

	if not underperforming_manufacturers.empty:
	# Calculate the missed amount
	underperforming_manufacturers['missed_amount'] = underperforming_manufacturers['ventas_predichas'] - underperforming_manufacturers['ventas_reales']

	# Sort by the highest missed amount
	underperforming_manufacturers = underperforming_manufacturers.sort_values(by='missed_amount', ascending=False)

	# Limit to top 10 missed amounts
	top_misses = underperforming_manufacturers.head(10)

	# Display two cards per row
	for i in range(0, len(top_misses), 2):
	cols = st.columns(2) # Create two columns for two cards in a row

	for j, col in enumerate(cols):
	if i + j < len(top_misses):
	row = top_misses.iloc[i + j]
	manufacturer_name = get_supplier_name_encoded(row['marca_id_encoded'])
	predicted = row['ventas_predichas']
	actual = row['ventas_reales']
	missed = row['missed_amount']

	# Use st.metric for compact display in each column
	with col:
	st.metric(
	label=f"{manufacturer_name}",
	value=f"{actual:.2f}€",
	delta=f"Missed by {missed:.2f}€",
	delta_color="inverse"
	)
	else:
	st.success("All manufacturers have met or exceeded predicted sales.")

	# Gráfico adicional: Comparar las ventas predichas y reales para los principales fabricantes
	st.markdown("### Predicted vs Actual Sales for Top Manufacturers")
	top_manufacturers = results.groupby('marca_id_encoded').agg({'ventas_reales': 'sum', 'ventas_predichas': 'sum'}).sort_values(by='ventas_reales', ascending=False).head(10)

	fig_comparison = go.Figure()
	fig_comparison.add_trace(go.Bar(x=top_manufacturers.index, y=top_manufacturers['ventas_reales'], name="Actual Sales", marker_color='blue'))
	fig_comparison.add_trace(go.Bar(x=top_manufacturers.index, y=top_manufacturers['ventas_predichas'], name="Predicted Sales", marker_color='orange'))

	fig_comparison.update_layout(
	title="Actual vs Predicted Sales by Top Manufacturers",
	xaxis_title="Manufacturer",
	yaxis_title="Sales (€)",
	barmode='group',
	height=400,
	hovermode="x unified"
	)

	st.plotly_chart(fig_comparison, use_container_width=True)

	# Gráfico de ventas anuales
	ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()

	sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
	if all(col in ventas_clientes.columns for col in sales_columns):
	customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]

	if not customer_sales_data.empty:
	customer_sales = customer_sales_data[sales_columns].values[0]
	years = ['2021', '2022', '2023']

	# Add the 2024 actual and predicted data
	if 'ventas_predichas' in results.columns and 'ventas_reales' in results.columns:
	actual_sales_2024 = results[results['fecha_mes'].dt.year == 2024]['ventas_reales'].sum()
	predicted_sales_2024 = results[results['fecha_mes'].dt.year == 2024]['ventas_predichas'].sum()

	months_available = 9
	actual_sales_2024_annual = (actual_sales_2024 / months_available) * 12

	sales_values = list(customer_sales) + [actual_sales_2024_annual]
	predicted_values = list(customer_sales) + [predicted_sales_2024]

	years.append('2024')

	fig_sales_bar = go.Figure()
	fig_sales_bar.add_trace(go.Bar(
	x=years[:3],
	y=sales_values[:3],
	name="Historical Sales",
	marker_color='blue'
	))

	fig_sales_bar.add_trace(go.Bar(
	x=[years[3]],
	y=[sales_values[3]],
	name="2024 Actual Sales (Annualized)",
	marker_color='green'
	))

	fig_sales_bar.add_trace(go.Bar(
	x=[years[3]],
	y=[predicted_values[3]],
	name="2024 Predicted Sales",
	marker_color='orange'
	))

	fig_sales_bar.update_layout(
	title=f"Sales Over the Years for Customer {customer_code}",
	xaxis_title="Year",
	yaxis_title="Sales (€)",
	barmode='group',
	height=600,
	legend_title_text="Sales Type",
	hovermode="x unified"
	)

	st.plotly_chart(fig_sales_bar, use_container_width=True)

	else:
	st.warning(f"No predicted or actual data found for customer {customer_code} for 2024.")



	# elif page == "Customer Analysis":
	# st.markdown("""
	# <h2 style='text-align: center; font-size: 2.5rem;'>Customer Analysis</h2>
	# <p style='text-align: center; font-size: 1.2rem; color: gray;'>
	# Enter the customer code to explore detailed customer insights,
	# including past sales, predictions for the current year, and manufacturer-specific information.
	# </p>
	# """, unsafe_allow_html=True)

	# # Combine text input and dropdown into a single searchable selectbox
	# customer_code = st.selectbox(
	# "Search and Select Customer Code",
	# df['CLIENTE'].unique(), # All customer codes
	# format_func=lambda x: str(x), # Ensures the values are displayed as strings
	# help="Start typing to search for a specific customer code"
	# )

	# if st.button("Calcular"):
	# if customer_code:
	# with st.spinner("We are identifying the customer's cluster..."):
	# # Find Customer's Cluster
	# customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
	# time.sleep(1)


	# if not customer_match.empty:
	# cluster = customer_match['cluster_id'].values[0]

	# with st.spinner(f"Selecting predictive model..."):
	# # Load the Corresponding Model
	# model_path = f'models/modelo_cluster_{cluster}.txt'
	# gbm = lgb.Booster(model_file=model_path)

	# with st.spinner("Getting the data ready..."):
	# # Load predict data for that cluster
	# predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')

	# # Convert cliente_id to string
	# predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)

	# with st.spinner("Filtering data..."):

	# # Filter for the specific customer
	# customer_code_str = str(customer_code)
	# customer_data = predict_data[predict_data['cliente_id'] == customer_code_str]

	# with st.spinner("Generating sales predictions..."):

	# if not customer_data.empty:
	# # Define features consistently with the training process
	# lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
	# features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']

	# # Prepare data for prediction
	# X_predict = customer_data[features]

	# # Convert categorical features to 'category' dtype
	# categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
	# for feature in categorical_features:
	# X_predict[feature] = X_predict[feature].astype('category')

	# # Make Prediction for the selected customer
	# y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)

	# # Reassemble the results
	# results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
	# results['ventas_predichas'] = y_pred

	# # Load actual data
	# actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]

	# if not actual_sales.empty:
	# results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
	# on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
	# how='left')
	# results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
	# results['ventas_reales'].fillna(0, inplace=True)
	# # st.write("### Final Results DataFrame:")
	# # st.write(results.head())
	# # st.write(f"Shape: {results.shape}")

	# # Calculate metrics only for non-null actual sales
	# valid_results = results.dropna(subset=['ventas_reales'])
	# non_zero_actuals = valid_results[valid_results['ventas_reales'] != 0]
	# if not valid_results.empty:
	# mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
	# mape = np.mean(np.abs((non_zero_actuals['ventas_reales'] - non_zero_actuals['ventas_predichas']) / non_zero_actuals['ventas_reales'])) * 100
	# rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))

	# # st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
	# # st.write(f"MAE: {mae:.2f}€")
	# # st.write(f"MAPE: {mape:.2f}%")
	# # st.write(f"RMSE: {rmse:.2f}")

	# # # Analysis of results
	# # threshold_good = 100 # You may want to adjust this threshold
	# # if mae < threshold_good:
	# # st.success(f"Customer {customer_code} is performing well based on the predictions.")
	# # else:
	# # st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
	# # else:
	# # st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")

	# # st.write("### Debug Information for Radar Chart:")
	# # st.write(f"Shape of customer_data: {customer_data.shape}")
	# # st.write(f"Shape of euros_proveedor: {euros_proveedor.shape}")

	# # Get percentage of units sold for each manufacturer
	# customer_df = df[df["CLIENTE"] == str(customer_code)] # Get the customer data
	# all_manufacturers = customer_df.iloc[:, 1:].T # Exclude CLIENTE column (manufacturers are in columns)
	# all_manufacturers.index = all_manufacturers.index.astype(str)

	# # Get total sales for each manufacturer from euros_proveedor
	# customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
	# sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
	# sales_data.index = sales_data.index.astype(str)

	# # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
	# sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')

	# # Ensure all values are numeric
	# sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
	# all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')

	# # Sort manufacturers by percentage of units and get top 10
	# top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)

	# # Sort manufacturers by total sales and get top 10
	# top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)

	# # Combine top manufacturers from both lists and get up to 20 unique manufacturers
	# combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]

	# # Filter out manufacturers that are not present in both datasets
	# combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]

	# # st.write(f"Number of combined top manufacturers: {len(combined_top)}")

	# if combined_top:
	# # Create a DataFrame with combined data for these top manufacturers
	# combined_data = pd.DataFrame({
	# 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
	# 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
	# }).fillna(0)

	# # Sort by units, then by sales
	# combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)

	# # Filter out manufacturers with 0 units
	# non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]

	# # If we have less than 3 non-zero manufacturers, add some zero-value ones
	# if len(non_zero_manufacturers) < 3:
	# zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
	# manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
	# else:
	# manufacturers_to_show = non_zero_manufacturers

	# values = manufacturers_to_show['units'].tolist()
	# amounts = manufacturers_to_show['sales'].tolist()
	# manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]

	# # st.write(f"### Results for top {len(manufacturers)} manufacturers:")
	# # for manufacturer, value, amount in zip(manufacturers, values, amounts):
	# # (f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")

	# if manufacturers: # Only create the chart if we have data
	# fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
	# st.pyplot(fig)
	# else:
	# st.warning("No data available to create the radar chart.")

	# else:
	# st.warning("No combined top manufacturers found.")

	# # Ensure codigo_cliente in ventas_clientes is a string
	# ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()

	# # Ensure customer_code is a string and strip any spaces
	# customer_code = str(customer_code).strip()

	# # if customer_code in ventas_clientes['codigo_cliente'].unique():
	# # (f"Customer {customer_code} found in ventas_clientes")
	# # else:
	# # (f"Customer {customer_code} not found in ventas_clientes")

	# # Customer sales 2021-2024 (if data exists)
	# sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
	# if all(col in ventas_clientes.columns for col in sales_columns):
	# customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]

	# if not customer_sales_data.empty:
	# customer_sales = customer_sales_data[sales_columns].values[0]
	# years = ['2021', '2022', '2023']

	# # Add the 2024 actual and predicted data
	# if 'ventas_predichas' in results.columns and 'ventas_reales' in results.columns:
	# # Get the actual and predicted sales for 2024
	# actual_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_reales'].sum()
	# predicted_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_predichas'].sum()

	# # Estimate full-year predicted sales (assuming predictions available until September)
	# months_available = 9 # Data available until September
	# actual_sales_2024_annual = (actual_sales_2024 / months_available) * 12

	# # Add 2024 actual and predicted sales
	# sales_values = list(customer_sales) + [actual_sales_2024_annual] # Actual sales
	# predicted_values = list(customer_sales) + [predicted_sales_2024] # Predicted sales

	# # Add 2024 to the years list
	# years.append('2024')

	# fig_sales_bar = go.Figure()
	# # Add trace for historical sales (2021-2023)
	# fig_sales_bar.add_trace(go.Bar(
	# x=years[:3], # 2021, 2022, 2023
	# y=sales_values[:3],
	# name="Historical Sales",
	# marker_color='blue'
	# ))

	# # Add trace for 2024 actual sales
	# fig_sales_bar.add_trace(go.Bar(
	# x=[years[3]], # 2024
	# y=[sales_values[3]],
	# name="2024 Actual Sales (Annualized)",
	# marker_color='green'
	# ))

	# # Add trace for 2024 predicted sales
	# fig_sales_bar.add_trace(go.Bar(
	# x=[years[3]], # 2024
	# y=[predicted_values[3]],
	# name="2024 Predicted Sales",
	# marker_color='orange'
	# ))

	# # Update layout
	# fig_sales_bar.update_layout(
	# title=f"Sales Over the Years for Customer {customer_code}",
	# xaxis_title="Year",
	# yaxis_title="Sales (€)",
	# barmode='group',
	# height=600,
	# legend_title_text="Sales Type",
	# hovermode="x unified"
	# )

	# # Show the interactive bar chart in Streamlit
	# st.plotly_chart(fig_sales_bar, use_container_width=True)

	# else:
	# st.warning(f"No predicted or actual data found for customer {customer_code} for 2024.")

	# else:
	# st.warning(f"No historical sales data found for customer {customer_code}")

	# else:
	# st.warning("Sales data for 2021-2023 not available in the dataset.")


	# Customer Recommendations Page
	elif page == "Articles Recommendations":
	st.title("Articles Recommendations")

	st.markdown("""
	Get tailored recommendations for your customers based on their basket.
	""")

	st.write("Select items and assign quantities for the basket:")

	# Mostrar lista de artículos disponibles
	available_articles = productos['ARTICULO'].unique()
	selected_articles = st.multiselect("Select Articles", available_articles)

	# Crear inputs para ingresar las cantidades de cada artículo seleccionado
	quantities = {}
	for article in selected_articles:
	quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)

	if st.button("Calcular"): # Añadimos el botón "Calcular"
	# Crear una lista de artículos basada en la selección
	new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]

	if new_basket:
	# Procesar la lista para recomendar
	recommendations_df = recomienda_tfid(new_basket)

	if not recommendations_df.empty:
	st.write("### Recommendations based on the current basket:")
	st.dataframe(recommendations_df)
	else:
	st.warning("No recommendations found for the provided basket.")
	else:
	st.warning("Please select at least one article and set its quantity.")

	# # Customer Recommendations Page
	# elif page == "Articles Recommendations":
	# st.title("Articles Recommendations")

	# st.markdown("""
	# Get tailored recommendations for your customers based on their basket.
	# """)

	# # Campo input para cliente
	# partial_code = st.text_input("Enter part of Customer Code for Recommendations (or leave empty to see all)")
	# if partial_code:
	# filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
	# else:
	# filtered_customers = df
	# customer_list = filtered_customers['CLIENTE'].unique()
	# customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))

	# # Definición de la función recomienda
	# def recomienda(new_basket):
	# # Calcular la matriz TF-IDF
	# tfidf = TfidfVectorizer()
	# tfidf_matrix = tfidf.fit_transform(cestas['Cestas'])

	# # Convertir la nueva cesta en formato TF-IDF
	# new_basket_str = ' '.join(new_basket)
	# new_basket_tfidf = tfidf.transform([new_basket_str])

	# # Comparar la nueva cesta con las anteriores
	# similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)

	# # Obtener los índices de las cestas más similares
	# similar_indices = similarities.argsort()[0][-3:] # Las 3 más similares

	# # Crear un diccionario para contar las recomendaciones
	# recommendations_count = {}
	# total_similarity = 0

	# # Recomendar productos de cestas similares
	# for idx in similar_indices:
	# sim_score = similarities[0][idx]
	# total_similarity += sim_score
	# products = cestas.iloc[idx]['Cestas'].split()

	# for product in products:
	# if product.strip() not in new_basket: # Evitar recomendar lo que ya está en la cesta
	# if product.strip() in recommendations_count:
	# recommendations_count[product.strip()] += sim_score
	# else:
	# recommendations_count[product.strip()] = sim_score

	# # Calcular la probabilidad relativa de cada producto recomendado
	# recommendations_with_prob = []
	# if total_similarity > 0: # Verificar que total_similarity no sea cero
	# recommendations_with_prob = [(product, score / total_similarity) for product, score in recommendations_count.items()]
	# else:
	# print("No se encontraron similitudes suficientes para calcular probabilidades.")

	# recommendations_with_prob.sort(key=lambda x: x[1], reverse=True) # Ordenar por puntuación

	# # Crear un nuevo DataFrame para almacenar las recomendaciones con descripciones y probabilidades
	# recommendations_df = pd.DataFrame(columns=['ARTICULO', 'DESCRIPCION', 'PROBABILIDAD'])

	# # Agregar las recomendaciones al DataFrame usando pd.concat
	# for product, prob in recommendations_with_prob:
	# # Buscar la descripción en el DataFrame de productos
	# description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
	# if not description.empty:
	# # Crear un nuevo DataFrame temporal para la recomendación
	# temp_df = pd.DataFrame({
	# 'ARTICULO': [product],
	# 'DESCRIPCION': [description.values[0]], # Obtener el primer valor encontrado
	# 'PROBABILIDAD': [prob]
	# })
	# # Concatenar el DataFrame temporal al DataFrame de recomendaciones
	# recommendations_df = pd.concat([recommendations_df, temp_df], ignore_index=True)

	# return recommendations_df

	# # Comprobar si el cliente está en el CSV de fieles
	# is_fiel = customer_code in fieles_df['Cliente'].astype(str).values

	# if customer_code:
	# if is_fiel:
	# st.write(f"### Customer {customer_code} is a loyal customer.")
	# option = st.selectbox("Select Recommendation Type", ["Select an option", "By Purchase History", "By Current Basket"])

	# if option == "By Purchase History":
	# st.warning("Option not available... aún")
	# elif option == "By Current Basket":
	# st.write("Select the items and assign quantities for the basket:")

	# # Mostrar lista de artículos disponibles
	# available_articles = productos['ARTICULO'].unique()
	# selected_articles = st.multiselect("Select Articles", available_articles)

	# # Crear inputs para ingresar las cantidades de cada artículo seleccionado
	# quantities = {}
	# for article in selected_articles:
	# quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)

	# if st.button("Calcular"): # Añadimos el botón "Calcular"
	# # Crear una lista de artículos basada en la selección
	# new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]

	# if new_basket:
	# # Procesar la lista para recomendar
	# recommendations_df = recomienda(new_basket)

	# if not recommendations_df.empty:
	# st.write("### Recommendations based on the current basket:")
	# st.dataframe(recommendations_df)
	# else:
	# st.warning("No recommendations found for the provided basket.")
	# else:
	# st.warning("Please select at least one article and set its quantity.")
	# else:
	# st.write(f"### Customer {customer_code} is not a loyal customer.")
	# st.write("Select items and assign quantities for the basket:")

	# # Mostrar lista de artículos disponibles
	# available_articles = productos['ARTICULO'].unique()
	# selected_articles = st.multiselect("Select Articles", available_articles)

	# # Crear inputs para ingresar las cantidades de cada artículo seleccionado
	# quantities = {}
	# for article in selected_articles:
	# quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)

	# if st.button("Calcular"): # Añadimos el botón "Calcular"
	# # Crear una lista de artículos basada en la selección
	# new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]

	# if new_basket:
	# # Procesar la lista para recomendar
	# recommendations_df = recomienda(new_basket)

	# if not recommendations_df.empty:
	# st.write("### Recommendations based on the current basket:")
	# st.dataframe(recommendations_df)
	# else:
	# st.warning("No recommendations found for the provided basket.")
	# else:
	# st.warning("Please select at least one article and set its quantity.")