Spaces:
Sleeping
Sleeping
GMARTINEZMILLA
commited on
Commit
•
9db7393
1
Parent(s):
ad618df
feat: updated app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,8 @@ import lightgbm as lgb
|
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
|
|
|
|
10 |
|
11 |
# Page configuration
|
12 |
st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
|
@@ -340,149 +342,219 @@ elif page == "Customer Analysis":
|
|
340 |
st.warning("Please select a customer.")
|
341 |
|
342 |
|
343 |
-
|
344 |
-
|
345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
|
347 |
-
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
|
355 |
-
else:
|
356 |
-
filtered_customers = df
|
357 |
-
customer_list = filtered_customers['CLIENTE'].unique()
|
358 |
-
customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))
|
359 |
-
|
360 |
-
# Definición de la función recomienda
|
361 |
-
def recomienda(new_basket):
|
362 |
-
# Calcular la matriz TF-IDF
|
363 |
-
tfidf = TfidfVectorizer()
|
364 |
-
tfidf_matrix = tfidf.fit_transform(cestas['Cestas'])
|
365 |
-
|
366 |
-
# Convertir la nueva cesta en formato TF-IDF
|
367 |
-
new_basket_str = ' '.join(new_basket)
|
368 |
-
new_basket_tfidf = tfidf.transform([new_basket_str])
|
369 |
-
|
370 |
-
# Comparar la nueva cesta con las anteriores
|
371 |
-
similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
|
372 |
-
|
373 |
-
# Obtener los índices de las cestas más similares
|
374 |
-
similar_indices = similarities.argsort()[0][-3:] # Las 3 más similares
|
375 |
-
|
376 |
-
# Crear un diccionario para contar las recomendaciones
|
377 |
-
recommendations_count = {}
|
378 |
-
total_similarity = 0
|
379 |
-
|
380 |
-
# Recomendar productos de cestas similares
|
381 |
-
for idx in similar_indices:
|
382 |
-
sim_score = similarities[0][idx]
|
383 |
-
total_similarity += sim_score
|
384 |
-
products = cestas.iloc[idx]['Cestas'].split()
|
385 |
-
|
386 |
-
for product in products:
|
387 |
-
if product.strip() not in new_basket: # Evitar recomendar lo que ya está en la cesta
|
388 |
-
if product.strip() in recommendations_count:
|
389 |
-
recommendations_count[product.strip()] += sim_score
|
390 |
-
else:
|
391 |
-
recommendations_count[product.strip()] = sim_score
|
392 |
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
# Crear inputs para ingresar las cantidades de cada artículo seleccionado
|
439 |
-
quantities = {}
|
440 |
-
for article in selected_articles:
|
441 |
-
quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
|
442 |
-
|
443 |
-
if st.button("Calcular"): # Añadimos el botón "Calcular"
|
444 |
-
# Crear una lista de artículos basada en la selección
|
445 |
-
new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
|
446 |
-
|
447 |
-
if new_basket:
|
448 |
-
# Procesar la lista para recomendar
|
449 |
-
recommendations_df = recomienda(new_basket)
|
450 |
-
|
451 |
-
if not recommendations_df.empty:
|
452 |
-
st.write("### Recommendations based on the current basket:")
|
453 |
-
st.dataframe(recommendations_df)
|
454 |
-
else:
|
455 |
-
st.warning("No recommendations found for the provided basket.")
|
456 |
-
else:
|
457 |
-
st.warning("Please select at least one article and set its quantity.")
|
458 |
-
else:
|
459 |
-
st.write(f"### Customer {customer_code} is not a loyal customer.")
|
460 |
-
st.write("Select items and assign quantities for the basket:")
|
461 |
|
462 |
-
|
463 |
-
|
464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
|
470 |
|
471 |
-
|
472 |
-
|
473 |
-
|
|
|
474 |
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
|
|
|
|
|
|
|
|
486 |
|
487 |
|
488 |
# Customer Analysis Page
|
|
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
10 |
+
from joblib import dump, load
|
11 |
+
|
12 |
|
13 |
# Page configuration
|
14 |
st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
|
|
|
342 |
st.warning("Please select a customer.")
|
343 |
|
344 |
|
345 |
+
def recomienda_tfid(new_basket):
|
346 |
+
|
347 |
+
cestas = pd.read_csv('../data/processed/cestas.csv')
|
348 |
+
productos = pd.read_csv('../data/processed/productos.csv')
|
349 |
+
|
350 |
+
# Cargar la matriz TF-IDF y el modelo
|
351 |
+
tfidf_matrix = load('../models/tfidf_matrix.joblib')
|
352 |
+
# MAtriz que tienen cada columna los diferentes artículos y las diferentes cestas en las filas
|
353 |
+
# Los valores son la importancia de cada artículo en la cesta según las veces que aparece en la misma y el total de artículos
|
354 |
+
tfidf = load('../models/tfidf_model.joblib')
|
355 |
+
|
356 |
+
# Convertir la nueva cesta en formato TF-IDF
|
357 |
+
new_basket_str = ' '.join(new_basket)
|
358 |
+
new_basket_tfidf = tfidf.transform([new_basket_str])
|
359 |
+
|
360 |
+
# Comparar la nueva cesta con las anteriores
|
361 |
+
# Calculando la distancia coseoidal, distancia entre rectas
|
362 |
+
similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
|
363 |
+
# La similitud coseno devuelve un valor entre 0 y 1, donde 1 significa
|
364 |
+
# que las cestas son idénticas en términos de productos y 0 que no comparten ningún producto.
|
365 |
+
|
366 |
+
# Obtener los índices de las cestas más similares
|
367 |
+
# Muestra los índices de Las 3 cestas más parecidas atendiendo a la distancia calculada anteriormente
|
368 |
+
similar_indices = similarities.argsort()[0][-4:] # Las 3 más similares
|
369 |
+
|
370 |
+
# Crear un diccionario para contar las recomendaciones
|
371 |
+
recommendations_count = {}
|
372 |
+
total_similarity = 0
|
373 |
+
|
374 |
+
# Recomendar productos de cestas similares
|
375 |
+
for idx in similar_indices:
|
376 |
+
sim_score = similarities[0][idx]
|
377 |
+
# sim_score es el valor de similitud de la cesta actual con la cesta similar.
|
378 |
+
total_similarity += sim_score # Suma de las similitudes entre 0 y el nº de cestas similares
|
379 |
+
products = cestas.iloc[idx]['Cestas'].split()
|
380 |
+
|
381 |
+
for product in products:
|
382 |
+
if product.strip() not in new_basket: # Evitar recomendar lo que ya está en la cesta
|
383 |
+
recommendations_count[product.strip()] = recommendations_count.get(product.strip(), 0) + sim_score
|
384 |
+
# se utiliza para incrementar el conteo del producto en recommendations_count.
|
385 |
+
# almacena el conteo de la relevancia de cada producto basado en cuántas veces aparece en las cestas similares, ponderado por la similitud de cada cesta.
|
386 |
+
# sumandole sim_score se incrementa el score cuando la cesta es mas similar
|
387 |
+
|
388 |
+
# Calcular la probabilidad relativa de cada producto recomendado
|
389 |
+
recommendations_with_prob = []
|
390 |
+
if total_similarity > 0: # Verificar que total_similarity no sea cero
|
391 |
+
recommendations_with_prob = [(product, score / total_similarity) for product, score in recommendations_count.items()]
|
392 |
+
# Se guarda cada producto junto su score calculada
|
393 |
+
else:
|
394 |
+
print("No se encontraron similitudes suficientes para calcular probabilidades.")
|
395 |
+
|
396 |
+
recommendations_with_prob.sort(key=lambda x: x[1], reverse=True) # Ordenar por puntuación
|
397 |
|
398 |
+
# Crear un nuevo DataFrame para almacenar las recomendaciones
|
399 |
+
recommendations_data = []
|
400 |
+
|
401 |
+
for product, score in recommendations_with_prob:
|
402 |
+
# Buscar la descripción en el DataFrame de productos
|
403 |
+
description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
|
404 |
+
if not description.empty:
|
405 |
+
recommendations_data.append({
|
406 |
+
'ARTICULO': product,
|
407 |
+
'DESCRIPCION': description.values[0], # Obtener el primer valor encontrado
|
408 |
+
'RELEVANCIA': score
|
409 |
+
})
|
410 |
+
|
411 |
+
recommendations_df = pd.DataFrame(recommendations_data)
|
412 |
+
|
413 |
+
return recommendations_df
|
414 |
|
415 |
+
# # Customer Recommendations Page
|
416 |
+
# elif page == "Articles Recommendations":
|
417 |
+
# st.title("Articles Recommendations")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418 |
|
419 |
+
# st.markdown("""
|
420 |
+
# Get tailored recommendations for your customers based on their basket.
|
421 |
+
# """)
|
422 |
+
|
423 |
+
# # Campo input para cliente
|
424 |
+
# partial_code = st.text_input("Enter part of Customer Code for Recommendations (or leave empty to see all)")
|
425 |
+
# if partial_code:
|
426 |
+
# filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
|
427 |
+
# else:
|
428 |
+
# filtered_customers = df
|
429 |
+
# customer_list = filtered_customers['CLIENTE'].unique()
|
430 |
+
# customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))
|
431 |
+
|
432 |
+
# # Definición de la función recomienda
|
433 |
+
# def recomienda(new_basket):
|
434 |
+
# # Calcular la matriz TF-IDF
|
435 |
+
# tfidf = TfidfVectorizer()
|
436 |
+
# tfidf_matrix = tfidf.fit_transform(cestas['Cestas'])
|
437 |
+
|
438 |
+
# # Convertir la nueva cesta en formato TF-IDF
|
439 |
+
# new_basket_str = ' '.join(new_basket)
|
440 |
+
# new_basket_tfidf = tfidf.transform([new_basket_str])
|
441 |
+
|
442 |
+
# # Comparar la nueva cesta con las anteriores
|
443 |
+
# similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
|
444 |
+
|
445 |
+
# # Obtener los índices de las cestas más similares
|
446 |
+
# similar_indices = similarities.argsort()[0][-3:] # Las 3 más similares
|
447 |
+
|
448 |
+
# # Crear un diccionario para contar las recomendaciones
|
449 |
+
# recommendations_count = {}
|
450 |
+
# total_similarity = 0
|
451 |
+
|
452 |
+
# # Recomendar productos de cestas similares
|
453 |
+
# for idx in similar_indices:
|
454 |
+
# sim_score = similarities[0][idx]
|
455 |
+
# total_similarity += sim_score
|
456 |
+
# products = cestas.iloc[idx]['Cestas'].split()
|
457 |
+
|
458 |
+
# for product in products:
|
459 |
+
# if product.strip() not in new_basket: # Evitar recomendar lo que ya está en la cesta
|
460 |
+
# if product.strip() in recommendations_count:
|
461 |
+
# recommendations_count[product.strip()] += sim_score
|
462 |
+
# else:
|
463 |
+
# recommendations_count[product.strip()] = sim_score
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
|
465 |
+
# # Calcular la probabilidad relativa de cada producto recomendado
|
466 |
+
# recommendations_with_prob = []
|
467 |
+
# if total_similarity > 0: # Verificar que total_similarity no sea cero
|
468 |
+
# recommendations_with_prob = [(product, score / total_similarity) for product, score in recommendations_count.items()]
|
469 |
+
# else:
|
470 |
+
# print("No se encontraron similitudes suficientes para calcular probabilidades.")
|
471 |
+
|
472 |
+
# recommendations_with_prob.sort(key=lambda x: x[1], reverse=True) # Ordenar por puntuación
|
473 |
+
|
474 |
+
# # Crear un nuevo DataFrame para almacenar las recomendaciones con descripciones y probabilidades
|
475 |
+
# recommendations_df = pd.DataFrame(columns=['ARTICULO', 'DESCRIPCION', 'PROBABILIDAD'])
|
476 |
+
|
477 |
+
# # Agregar las recomendaciones al DataFrame usando pd.concat
|
478 |
+
# for product, prob in recommendations_with_prob:
|
479 |
+
# # Buscar la descripción en el DataFrame de productos
|
480 |
+
# description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
|
481 |
+
# if not description.empty:
|
482 |
+
# # Crear un nuevo DataFrame temporal para la recomendación
|
483 |
+
# temp_df = pd.DataFrame({
|
484 |
+
# 'ARTICULO': [product],
|
485 |
+
# 'DESCRIPCION': [description.values[0]], # Obtener el primer valor encontrado
|
486 |
+
# 'PROBABILIDAD': [prob]
|
487 |
+
# })
|
488 |
+
# # Concatenar el DataFrame temporal al DataFrame de recomendaciones
|
489 |
+
# recommendations_df = pd.concat([recommendations_df, temp_df], ignore_index=True)
|
490 |
+
|
491 |
+
# return recommendations_df
|
492 |
+
|
493 |
+
# # Comprobar si el cliente está en el CSV de fieles
|
494 |
+
# is_fiel = customer_code in fieles_df['Cliente'].astype(str).values
|
495 |
+
|
496 |
+
# if customer_code:
|
497 |
+
# if is_fiel:
|
498 |
+
# st.write(f"### Customer {customer_code} is a loyal customer.")
|
499 |
+
# option = st.selectbox("Select Recommendation Type", ["Select an option", "By Purchase History", "By Current Basket"])
|
500 |
+
|
501 |
+
# if option == "By Purchase History":
|
502 |
+
# st.warning("Option not available... aún")
|
503 |
+
# elif option == "By Current Basket":
|
504 |
+
# st.write("Select the items and assign quantities for the basket:")
|
505 |
+
|
506 |
+
# # Mostrar lista de artículos disponibles
|
507 |
+
# available_articles = productos['ARTICULO'].unique()
|
508 |
+
# selected_articles = st.multiselect("Select Articles", available_articles)
|
509 |
+
|
510 |
+
# # Crear inputs para ingresar las cantidades de cada artículo seleccionado
|
511 |
+
# quantities = {}
|
512 |
+
# for article in selected_articles:
|
513 |
+
# quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
|
514 |
+
|
515 |
+
# if st.button("Calcular"): # Añadimos el botón "Calcular"
|
516 |
+
# # Crear una lista de artículos basada en la selección
|
517 |
+
# new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
|
518 |
+
|
519 |
+
# if new_basket:
|
520 |
+
# # Procesar la lista para recomendar
|
521 |
+
# recommendations_df = recomienda(new_basket)
|
522 |
+
|
523 |
+
# if not recommendations_df.empty:
|
524 |
+
# st.write("### Recommendations based on the current basket:")
|
525 |
+
# st.dataframe(recommendations_df)
|
526 |
+
# else:
|
527 |
+
# st.warning("No recommendations found for the provided basket.")
|
528 |
+
# else:
|
529 |
+
# st.warning("Please select at least one article and set its quantity.")
|
530 |
+
# else:
|
531 |
+
# st.write(f"### Customer {customer_code} is not a loyal customer.")
|
532 |
+
# st.write("Select items and assign quantities for the basket:")
|
533 |
|
534 |
+
# # Mostrar lista de artículos disponibles
|
535 |
+
# available_articles = productos['ARTICULO'].unique()
|
536 |
+
# selected_articles = st.multiselect("Select Articles", available_articles)
|
|
|
537 |
|
538 |
+
# # Crear inputs para ingresar las cantidades de cada artículo seleccionado
|
539 |
+
# quantities = {}
|
540 |
+
# for article in selected_articles:
|
541 |
+
# quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
|
542 |
|
543 |
+
# if st.button("Calcular"): # Añadimos el botón "Calcular"
|
544 |
+
# # Crear una lista de artículos basada en la selección
|
545 |
+
# new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
|
546 |
|
547 |
+
# if new_basket:
|
548 |
+
# # Procesar la lista para recomendar
|
549 |
+
# recommendations_df = recomienda(new_basket)
|
550 |
+
|
551 |
+
# if not recommendations_df.empty:
|
552 |
+
# st.write("### Recommendations based on the current basket:")
|
553 |
+
# st.dataframe(recommendations_df)
|
554 |
+
# else:
|
555 |
+
# st.warning("No recommendations found for the provided basket.")
|
556 |
+
# else:
|
557 |
+
# st.warning("Please select at least one article and set its quantity.")
|
558 |
|
559 |
|
560 |
# Customer Analysis Page
|