commited on
feat: updated
Browse files
@@ -6,16 +6,18 @@ import numpy as np
6 |
import lightgbm as lgb
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
8 |
from sklearn.metrics.pairwise import cosine_similarity
9 |
10 |
# Page configuration
11 |
st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
12 |
13 |
# Load CSV files at the top
14 |
df = pd.read_csv("df_clean.csv")
15 |
nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
16 |
euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
17 |
ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
18 |
customer_clusters = pd.read_csv('predicts/customer_clusters.csv') # Load the customer clusters here
19 |
20 |
# Ensure customer codes are strings
21 |
df['CLIENTE'] = df['CLIENTE'].astype(str)
@@ -25,6 +27,7 @@ customer_clusters['cliente_id'] = customer_clusters['cliente_id'].astype(str) #
25 |
fieles_df = pd.read_csv("clientes_relevantes.csv")
26 |
cestas = pd.read_csv("cestas.csv")
27 |
productos = pd.read_csv("productos.csv")
28 |
29 |
# Convert all columns except 'CLIENTE' to float in euros_proveedor
30 |
for col in euros_proveedor.columns:
@@ -103,144 +106,6 @@ if page == "":
103 |
st.markdown("## Welcome to the Customer Insights App")
104 |
st.write("Use the dropdown menu to navigate between the different sections.")
105 |
106 |
# Customer Analysis Page
107 |
# elif page == "Customer Analysis":
108 |
# st.title("Customer Analysis")
109 |
# st.markdown("Use the tools below to explore your customer data.")
110 |
111 |
# partial_code = st.text_input("Enter part of Customer Code (or leave empty to see all)")
112 |
# if partial_code:
113 |
# filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
114 |
# else:
115 |
# filtered_customers = df
116 |
# customer_list = filtered_customers['CLIENTE'].unique()
117 |
# customer_code = st.selectbox("Select Customer Code", customer_list)
118 |
119 |
# if st.button("Calcular"):
120 |
# if customer_code:
121 |
# # Find Customer's Cluster
122 |
# customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
123 |
124 |
# if not customer_match.empty:
125 |
# cluster = customer_match['cluster_id'].values[0]
126 |
# st.write(f"Customer {customer_code} belongs to cluster {cluster}")
127 |
128 |
# # Load the Corresponding Model
129 |
# model_path = f'models/modelo_cluster_{cluster}.txt'
130 |
# gbm = lgb.Booster(model_file=model_path)
131 |
# st.write(f"Loaded model for cluster {cluster}")
132 |
133 |
# # Load X_predict for that cluster
134 |
# X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
135 |
136 |
# # Filter for the specific customer
137 |
# X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
138 |
139 |
# if not X_cliente.empty:
140 |
# # Prepare data for prediction
141 |
# features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
142 |
143 |
# # Make Prediction for the selected customer
144 |
# y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
145 |
146 |
# # Reassemble the results
147 |
# results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
148 |
# results['ventas_predichas'] = y_pred
149 |
150 |
# st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
151 |
152 |
# # Load actual data
153 |
# df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
154 |
# actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
155 |
156 |
# if not actual_sales.empty:
157 |
# results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
158 |
# on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
159 |
# how='left')
160 |
# results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
161 |
162 |
# # Calculate metrics only for non-null actual sales
163 |
# valid_results = results.dropna(subset=['ventas_reales'])
164 |
# if not valid_results.empty:
165 |
# mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
166 |
# mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
167 |
# rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
168 |
169 |
# st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
170 |
# st.write(f"MAE: {mae:.2f}")
171 |
# st.write(f"MAPE: {mape:.2f}%")
172 |
# st.write(f"RMSE: {rmse:.2f}")
173 |
174 |
# # Analysis of results
175 |
# threshold_good = 100 # You may want to adjust this threshold
176 |
# if mae < threshold_good:
177 |
# st.success(f"Customer {customer_code} is performing well based on the predictions.")
178 |
# else:
179 |
# st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
180 |
# else:
181 |
# st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
182 |
183 |
# # Show the radar chart
184 |
# all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
185 |
# all_manufacturers.index = all_manufacturers.index.astype(str)
186 |
187 |
# sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
188 |
# sales_data.index = sales_data.index.astype(str)
189 |
190 |
# sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
191 |
# sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
192 |
193 |
# top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
194 |
# top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
195 |
# combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
196 |
# combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
197 |
198 |
# combined_data = pd.DataFrame({
199 |
# 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
200 |
# 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
201 |
# }).fillna(0)
202 |
203 |
# combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
204 |
# non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
205 |
206 |
# if len(non_zero_manufacturers) < 3:
207 |
# zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
208 |
# manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
209 |
# else:
210 |
# manufacturers_to_show = non_zero_manufacturers
211 |
212 |
# values = manufacturers_to_show['units'].tolist()
213 |
# amounts = manufacturers_to_show['sales'].tolist()
214 |
# manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
215 |
216 |
# st.write(f"### Results for top {len(manufacturers)} manufacturers:")
217 |
# for manufacturer, value, amount in zip(manufacturers, values, amounts):
218 |
# st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
219 |
220 |
# if manufacturers:
221 |
# fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
222 |
# st.pyplot(fig)
223 |
# else:
224 |
# st.warning("No data available to create the radar chart.")
225 |
226 |
# # Show sales over the years graph
227 |
# sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
228 |
# if all(col in ventas_clientes.columns for col in sales_columns):
229 |
# years = ['2021', '2022', '2023']
230 |
# customer_sales = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code][sales_columns].values[0]
231 |
232 |
# fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
233 |
# fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
234 |
# st.plotly_chart(fig_sales)
235 |
# else:
236 |
# st.warning("Sales data for 2021-2023 not available.")
237 |
# else:
238 |
# st.warning(f"No prediction data found for customer {customer_code}.")
239 |
# else:
240 |
# st.warning(f"No data found for customer {customer_code}. Please check the code.")
241 |
# else:
242 |
# st.warning("Please select a customer.")
243 |
244 |
elif page == "Customer Analysis":
245 |
st.title("Customer Analysis")
246 |
st.markdown("Use the tools below to explore your customer data.")
@@ -333,8 +198,7 @@ elif page == "Customer Analysis":
333 |
st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
334 |
335 |
# Load actual data
336 |
337 |
actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
338 |
st.write("### Actual Sales DataFrame:")
339 |
340 |
st.write(f"Shape: {actual_sales.shape}")
@@ -574,3 +438,142 @@ elif page == "Articles Recommendations":
574 |
st.warning("No recommendations found for the provided basket.")
575 |
576 |
st.warning("Please select at least one article and set its quantity.")
6 |
import lightgbm as lgb
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
8 |
from sklearn.metrics.pairwise import cosine_similarity
9 |
from sklearn.metrics import mean_absolute_error, mean_squared_error
10 |
11 |
# Page configuration
12 |
st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
13 |
14 |
# Load CSV files at the top
15 |
df = pd.read_csv("df_clean.csv")
16 |
nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
17 |
euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
18 |
ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
19 |
customer_clusters = pd.read_csv('predicts/customer_clusters.csv') # Load the customer clusters here
20 |
df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
21 |
22 |
# Ensure customer codes are strings
23 |
df['CLIENTE'] = df['CLIENTE'].astype(str)
27 |
fieles_df = pd.read_csv("clientes_relevantes.csv")
28 |
cestas = pd.read_csv("cestas.csv")
29 |
productos = pd.read_csv("productos.csv")
30 |
df_agg_2024['cliente_id'] = df_agg_2024['cliente_id'].astype(str)
31 |
32 |
# Convert all columns except 'CLIENTE' to float in euros_proveedor
33 |
for col in euros_proveedor.columns:
106 |
st.markdown("## Welcome to the Customer Insights App")
107 |
st.write("Use the dropdown menu to navigate between the different sections.")
108 |
109 |
elif page == "Customer Analysis":
110 |
st.title("Customer Analysis")
111 |
st.markdown("Use the tools below to explore your customer data.")
198 |
st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
199 |
200 |
# Load actual data
201 |
actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
202 |
st.write("### Actual Sales DataFrame:")
203 |
204 |
st.write(f"Shape: {actual_sales.shape}")
438 |
st.warning("No recommendations found for the provided basket.")
439 |
440 |
st.warning("Please select at least one article and set its quantity.")
441 |
442 |
443 |
# Customer Analysis Page
444 |
# elif page == "Customer Analysis":
445 |
# st.title("Customer Analysis")
446 |
# st.markdown("Use the tools below to explore your customer data.")
447 |
448 |
# partial_code = st.text_input("Enter part of Customer Code (or leave empty to see all)")
449 |
# if partial_code:
450 |
# filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
451 |
# else:
452 |
# filtered_customers = df
453 |
# customer_list = filtered_customers['CLIENTE'].unique()
454 |
# customer_code = st.selectbox("Select Customer Code", customer_list)
455 |
456 |
# if st.button("Calcular"):
457 |
# if customer_code:
458 |
# # Find Customer's Cluster
459 |
# customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
460 |
461 |
# if not customer_match.empty:
462 |
# cluster = customer_match['cluster_id'].values[0]
463 |
# st.write(f"Customer {customer_code} belongs to cluster {cluster}")
464 |
465 |
# # Load the Corresponding Model
466 |
# model_path = f'models/modelo_cluster_{cluster}.txt'
467 |
# gbm = lgb.Booster(model_file=model_path)
468 |
# st.write(f"Loaded model for cluster {cluster}")
469 |
470 |
# # Load X_predict for that cluster
471 |
# X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
472 |
473 |
# # Filter for the specific customer
474 |
# X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
475 |
476 |
# if not X_cliente.empty:
477 |
# # Prepare data for prediction
478 |
# features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
479 |
480 |
# # Make Prediction for the selected customer
481 |
# y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
482 |
483 |
# # Reassemble the results
484 |
# results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
485 |
# results['ventas_predichas'] = y_pred
486 |
487 |
# st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
488 |
489 |
# # Load actual data
490 |
# df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
491 |
# actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
492 |
493 |
# if not actual_sales.empty:
494 |
# results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
495 |
# on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
496 |
# how='left')
497 |
# results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
498 |
499 |
# # Calculate metrics only for non-null actual sales
500 |
# valid_results = results.dropna(subset=['ventas_reales'])
501 |
# if not valid_results.empty:
502 |
# mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
503 |
# mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
504 |
# rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
505 |
506 |
# st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
507 |
# st.write(f"MAE: {mae:.2f}")
508 |
# st.write(f"MAPE: {mape:.2f}%")
509 |
# st.write(f"RMSE: {rmse:.2f}")
510 |
511 |
# # Analysis of results
512 |
# threshold_good = 100 # You may want to adjust this threshold
513 |
# if mae < threshold_good:
514 |
# st.success(f"Customer {customer_code} is performing well based on the predictions.")
515 |
# else:
516 |
# st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
517 |
# else:
518 |
# st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
519 |
520 |
# # Show the radar chart
521 |
# all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
522 |
# all_manufacturers.index = all_manufacturers.index.astype(str)
523 |
524 |
# sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
525 |
# sales_data.index = sales_data.index.astype(str)
526 |
527 |
# sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
528 |
# sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
529 |
530 |
# top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
531 |
# top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
532 |
# combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
533 |
# combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
534 |
535 |
# combined_data = pd.DataFrame({
536 |
# 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
537 |
# 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
538 |
# }).fillna(0)
539 |
540 |
# combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
541 |
# non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
542 |
543 |
# if len(non_zero_manufacturers) < 3:
544 |
# zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
545 |
# manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
546 |
# else:
547 |
# manufacturers_to_show = non_zero_manufacturers
548 |
549 |
# values = manufacturers_to_show['units'].tolist()
550 |
# amounts = manufacturers_to_show['sales'].tolist()
551 |
# manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
552 |
553 |
# st.write(f"### Results for top {len(manufacturers)} manufacturers:")
554 |
# for manufacturer, value, amount in zip(manufacturers, values, amounts):
555 |
# st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
556 |
557 |
# if manufacturers:
558 |
# fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
559 |
# st.pyplot(fig)
560 |
# else:
561 |
# st.warning("No data available to create the radar chart.")
562 |
563 |
# # Show sales over the years graph
564 |
# sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
565 |
# if all(col in ventas_clientes.columns for col in sales_columns):
566 |
# years = ['2021', '2022', '2023']
567 |
# customer_sales = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code][sales_columns].values[0]
568 |
569 |
# fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
570 |
# fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
571 |
# st.plotly_chart(fig_sales)
572 |
# else:
573 |
# st.warning("Sales data for 2021-2023 not available.")
574 |
# else:
575 |
# st.warning(f"No prediction data found for customer {customer_code}.")
576 |
# else:
577 |
# st.warning(f"No data found for customer {customer_code}. Please check the code.")
578 |
# else:
579 |
# st.warning("Please select a customer.")