GMARTINEZMILLA commited on
Commit
36df00a
1 Parent(s): 16acf43

bugfix: fixed CLIENTE for Cliente

Browse files
Files changed (1) hide show
  1. app.py +108 -106
app.py CHANGED
@@ -114,85 +114,88 @@ elif page == "Customer Analysis":
114
  customer_list = filtered_customers['CLIENTE'].unique()
115
  customer_code = st.selectbox("Select Customer Code", customer_list)
116
 
117
- if customer_code:
118
- customer_data = df[df["CLIENTE"] == str(customer_code)]
119
- customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
 
120
 
121
- if not customer_data.empty and not customer_euros.empty:
122
- st.write(f"### Analysis for Customer {customer_code}")
123
 
124
- # Get percentage of units sold for each manufacturer
125
- all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
126
- all_manufacturers.index = all_manufacturers.index.astype(str)
127
 
128
- # Get total sales for each manufacturer
129
- sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
130
- sales_data.index = sales_data.index.astype(str)
131
 
132
- # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
133
- sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
134
 
135
- # Ensure all values are numeric
136
- sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
137
 
138
- # Sort manufacturers by percentage of units and get top 10
139
- top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
140
 
141
- # Sort manufacturers by total sales and get top 10
142
- top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
143
 
144
- # Combine top manufacturers from both lists and get up to 20 unique manufacturers
145
- combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
146
 
147
- # Filter out manufacturers that are not present in both datasets
148
- combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
149
 
150
- # Create a DataFrame with combined data for these top manufacturers
151
- combined_data = pd.DataFrame({
152
- 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
153
- 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
154
- }).fillna(0)
155
 
156
- # Sort by units, then by sales
157
- combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
158
 
159
- # Filter out manufacturers with 0 units
160
- non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
161
 
162
- # If we have less than 3 non-zero manufacturers, add some zero-value ones
163
- if len(non_zero_manufacturers) < 3:
164
- zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
165
- manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
166
- else:
167
- manufacturers_to_show = non_zero_manufacturers
168
 
169
- values = manufacturers_to_show['units'].tolist()
170
- amounts = manufacturers_to_show['sales'].tolist()
171
- manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
172
 
173
- st.write(f"### Results for top {len(manufacturers)} manufacturers:")
174
- for manufacturer, value, amount in zip(manufacturers, values, amounts):
175
- st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
176
 
177
- if manufacturers: # Only create the chart if we have data
178
- fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
179
- st.pyplot(fig)
180
- else:
181
- st.warning("No data available to create the radar chart.")
182
 
183
- # Customer sales 2021-2024 (if data exists)
184
- sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023', 'VENTA_2024']
185
- if all(col in df.columns for col in sales_columns):
186
- years = ['2021', '2022', '2023', '2024']
187
- customer_sales = customer_data[sales_columns].values[0]
188
 
189
- fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
190
- fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
191
- st.plotly_chart(fig_sales)
 
 
192
  else:
193
- st.warning("Sales data for 2021-2024 not available.")
194
  else:
195
- st.warning(f"No data found for customer {customer_code}. Please check the code.")
196
 
197
  # Customer Recommendations Page
198
  elif page == "Articles Recommendations":
@@ -209,9 +212,9 @@ elif page == "Articles Recommendations":
209
  else:
210
  filtered_customers = df
211
  customer_list = filtered_customers['CLIENTE'].unique()
212
- customer_code = st.selectbox("Select Customer Code for Recommendations", customer_list)
213
 
214
- # DEfinicion de la funcion recomienda
215
  def recomienda(new_basket):
216
  # Calcular la matriz TF-IDF
217
  tfidf = TfidfVectorizer()
@@ -224,7 +227,6 @@ elif page == "Articles Recommendations":
224
  # Comparar la nueva cesta con las anteriores
225
  similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
226
 
227
-
228
  # Obtener los índices de las cestas más similares
229
  similar_indices = similarities.argsort()[0][-3:] # Las 3 más similares
230
 
@@ -237,13 +239,13 @@ elif page == "Articles Recommendations":
237
  sim_score = similarities[0][idx]
238
  total_similarity += sim_score
239
  products = cestas.iloc[idx]['Cestas'].split()
240
-
241
- for product in products:
242
- if product.strip() not in new_basket: # Evitar recomendar lo que ya está en la cesta
243
- if product.strip() in recommendations_count:
244
- recommendations_count[product.strip()] += sim_score
245
- else:
246
- recommendations_count[product.strip()] = sim_score
247
 
248
  # Calcular la probabilidad relativa de cada producto recomendado
249
  recommendations_with_prob = []
@@ -259,22 +261,21 @@ elif page == "Articles Recommendations":
259
 
260
  # Agregar las recomendaciones al DataFrame usando pd.concat
261
  for product, prob in recommendations_with_prob:
262
- # Buscar la descripción en el DataFrame de productos
263
  description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
264
  if not description.empty:
265
- # Crear un nuevo DataFrame temporal para la recomendación
266
- temp_df = pd.DataFrame({
267
- 'ARTICULO': [product],
268
- 'DESCRIPCION': [description.values[0]], # Obtener el primer valor encontrado
269
- 'PROBABILIDAD': [prob]
270
- })
271
- # Concatenar el DataFrame temporal al DataFrame de recomendaciones
272
- recommendations_df = pd.concat([recommendations_df, temp_df], ignore_index=True)
273
 
274
- return recommendations_df
275
 
276
  # Comprobar si el cliente está en el CSV de fieles
277
-
278
  is_fiel = customer_code in fieles_df['Cliente'].astype(str).values
279
 
280
  if customer_code:
@@ -284,14 +285,38 @@ elif page == "Articles Recommendations":
284
 
285
  if option == "By Purchase History":
286
  st.warning("Option not available... aún")
287
- elif option == "By Current Basket":
288
-
289
  st.write("Enter the items in the basket:")
290
 
291
  # Input para los artículos y unidades
292
  items = st.text_input("Enter items (comma-separated):").split(',')
293
  quantities = st.text_input("Enter quantities (comma-separated):").split(',')
294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  # Crear una lista de artículos basada en la entrada
296
  new_basket = [item.strip() for item in items]
297
 
@@ -306,27 +331,4 @@ elif page == "Articles Recommendations":
306
  else:
307
  st.warning("No recommendations found for the provided basket.")
308
  else:
309
- st.warning("The number of items must match the number of quantities.")
310
- else:
311
- st.write(f"### Customer {customer_code} is not a loyal customer.")
312
- st.write("Recommendation based on the basket. Please enter the items:")
313
-
314
- # Input para los artículos y unidades
315
- items = st.text_input("Enter items (comma-separated):").split(',')
316
- quantities = st.text_input("Enter quantities (comma-separated):").split(',')
317
-
318
- # Crear una lista de artículos basada en la entrada
319
- new_basket = [item.strip() for item in items]
320
-
321
- # Asegurarse de que las longitudes de artículos y cantidades coincidan
322
- if len(new_basket) == len(quantities):
323
- # Procesar la lista para recomendar
324
- recommendations_df = recomienda(new_basket)
325
-
326
- if not recommendations_df.empty:
327
- st.write("### Recommendations based on the current basket:")
328
- st.dataframe(recommendations_df)
329
- else:
330
- st.warning("No recommendations found for the provided basket.")
331
- else:
332
- st.warning("The number of items must match the number of quantities.")
 
114
  customer_list = filtered_customers['CLIENTE'].unique()
115
  customer_code = st.selectbox("Select Customer Code", customer_list)
116
 
117
+ if st.button("Calcular"):
118
+ if customer_code:
119
+ customer_data = df[df["CLIENTE"] == str(customer_code)]
120
+ customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
121
 
122
+ if not customer_data.empty and not customer_euros.empty:
123
+ st.write(f"### Analysis for Customer {customer_code}")
124
 
125
+ # Get percentage of units sold for each manufacturer
126
+ all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
127
+ all_manufacturers.index = all_manufacturers.index.astype(str)
128
 
129
+ # Get total sales for each manufacturer
130
+ sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
131
+ sales_data.index = sales_data.index.astype(str)
132
 
133
+ # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
134
+ sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
135
 
136
+ # Ensure all values are numeric
137
+ sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
138
 
139
+ # Sort manufacturers by percentage of units and get top 10
140
+ top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
141
 
142
+ # Sort manufacturers by total sales and get top 10
143
+ top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
144
 
145
+ # Combine top manufacturers from both lists and get up to 20 unique manufacturers
146
+ combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
147
 
148
+ # Filter out manufacturers that are not present in both datasets
149
+ combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
150
 
151
+ # Create a DataFrame with combined data for these top manufacturers
152
+ combined_data = pd.DataFrame({
153
+ 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
154
+ 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
155
+ }).fillna(0)
156
 
157
+ # Sort by units, then by sales
158
+ combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
159
 
160
+ # Filter out manufacturers with 0 units
161
+ non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
162
 
163
+ # If we have less than 3 non-zero manufacturers, add some zero-value ones
164
+ if len(non_zero_manufacturers) < 3:
165
+ zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
166
+ manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
167
+ else:
168
+ manufacturers_to_show = non_zero_manufacturers
169
 
170
+ values = manufacturers_to_show['units'].tolist()
171
+ amounts = manufacturers_to_show['sales'].tolist()
172
+ manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
173
 
174
+ st.write(f"### Results for top {len(manufacturers)} manufacturers:")
175
+ for manufacturer, value, amount in zip(manufacturers, values, amounts):
176
+ st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
177
 
178
+ if manufacturers: # Only create the chart if we have data
179
+ fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
180
+ st.pyplot(fig)
181
+ else:
182
+ st.warning("No data available to create the radar chart.")
183
 
184
+ # Customer sales 2021-2024 (if data exists)
185
+ sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023', 'VENTA_2024']
186
+ if all(col in df.columns for col in sales_columns):
187
+ years = ['2021', '2022', '2023', '2024']
188
+ customer_sales = customer_data[sales_columns].values[0]
189
 
190
+ fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
191
+ fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
192
+ st.plotly_chart(fig_sales)
193
+ else:
194
+ st.warning("Sales data for 2021-2024 not available.")
195
  else:
196
+ st.warning(f"No data found for customer {customer_code}. Please check the code.")
197
  else:
198
+ st.warning("Please select a customer.")
199
 
200
  # Customer Recommendations Page
201
  elif page == "Articles Recommendations":
 
212
  else:
213
  filtered_customers = df
214
  customer_list = filtered_customers['CLIENTE'].unique()
215
+ customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))
216
 
217
+ # Definición de la función recomienda
218
  def recomienda(new_basket):
219
  # Calcular la matriz TF-IDF
220
  tfidf = TfidfVectorizer()
 
227
  # Comparar la nueva cesta con las anteriores
228
  similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
229
 
 
230
  # Obtener los índices de las cestas más similares
231
  similar_indices = similarities.argsort()[0][-3:] # Las 3 más similares
232
 
 
239
  sim_score = similarities[0][idx]
240
  total_similarity += sim_score
241
  products = cestas.iloc[idx]['Cestas'].split()
242
+
243
+ for product in products:
244
+ if product.strip() not in new_basket: # Evitar recomendar lo que ya está en la cesta
245
+ if product.strip() in recommendations_count:
246
+ recommendations_count[product.strip()] += sim_score
247
+ else:
248
+ recommendations_count[product.strip()] = sim_score
249
 
250
  # Calcular la probabilidad relativa de cada producto recomendado
251
  recommendations_with_prob = []
 
261
 
262
  # Agregar las recomendaciones al DataFrame usando pd.concat
263
  for product, prob in recommendations_with_prob:
264
+ # Buscar la descripción en el DataFrame de productos
265
  description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
266
  if not description.empty:
267
+ # Crear un nuevo DataFrame temporal para la recomendación
268
+ temp_df = pd.DataFrame({
269
+ 'ARTICULO': [product],
270
+ 'DESCRIPCION': [description.values[0]], # Obtener el primer valor encontrado
271
+ 'PROBABILIDAD': [prob]
272
+ })
273
+ # Concatenar el DataFrame temporal al DataFrame de recomendaciones
274
+ recommendations_df = pd.concat([recommendations_df, temp_df], ignore_index=True)
275
 
276
+ return recommendations_df
277
 
278
  # Comprobar si el cliente está en el CSV de fieles
 
279
  is_fiel = customer_code in fieles_df['Cliente'].astype(str).values
280
 
281
  if customer_code:
 
285
 
286
  if option == "By Purchase History":
287
  st.warning("Option not available... aún")
288
+ elif option == "By Current Basket":
 
289
  st.write("Enter the items in the basket:")
290
 
291
  # Input para los artículos y unidades
292
  items = st.text_input("Enter items (comma-separated):").split(',')
293
  quantities = st.text_input("Enter quantities (comma-separated):").split(',')
294
 
295
+ if st.button("Calcular"): # Añadimos el botón "Calcular"
296
+ # Crear una lista de artículos basada en la entrada
297
+ new_basket = [item.strip() for item in items]
298
+
299
+ # Asegurarse de que las longitudes de artículos y cantidades coincidan
300
+ if len(new_basket) == len(quantities):
301
+ # Procesar la lista para recomendar
302
+ recommendations_df = recomienda(new_basket)
303
+
304
+ if not recommendations_df.empty:
305
+ st.write("### Recommendations based on the current basket:")
306
+ st.dataframe(recommendations_df)
307
+ else:
308
+ st.warning("No recommendations found for the provided basket.")
309
+ else:
310
+ st.warning("The number of items must match the number of quantities.")
311
+ else:
312
+ st.write(f"### Customer {customer_code} is not a loyal customer.")
313
+ st.write("Recommendation based on the basket. Please enter the items:")
314
+
315
+ # Input para los artículos y unidades
316
+ items = st.text_input("Enter items (comma-separated):").split(',')
317
+ quantities = st.text_input("Enter quantities (comma-separated):").split(',')
318
+
319
+ if st.button("Calcular"): # Añadimos el botón "Calcular"
320
  # Crear una lista de artículos basada en la entrada
321
  new_basket = [item.strip() for item in items]
322
 
 
331
  else:
332
  st.warning("No recommendations found for the provided basket.")
333
  else:
334
+ st.warning("The number of items must match the number of quantities.")