Spaces:

GMARTINEZMILLA
/

Final_Project

Sleeping

App Files Files Community

GMARTINEZMILLA commited on Oct 13

Commit

36df00a

•

1 Parent(s): 16acf43

bugfix: fixed CLIENTE for Cliente

Browse files

Files changed (1) hide show

app.py +108 -106

app.py CHANGED Viewed

@@ -114,85 +114,88 @@ elif page == "Customer Analysis":
     customer_list = filtered_customers['CLIENTE'].unique()
     customer_code = st.selectbox("Select Customer Code", customer_list)
-    if customer_code:
-        customer_data = df[df["CLIENTE"] == str(customer_code)]
-        customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
-        if not customer_data.empty and not customer_euros.empty:
-            st.write(f"### Analysis for Customer {customer_code}")
-            # Get percentage of units sold for each manufacturer
-            all_manufacturers = customer_data.iloc[:, 1:].T  # Exclude CLIENTE column
-            all_manufacturers.index = all_manufacturers.index.astype(str)
-            # Get total sales for each manufacturer
-            sales_data = customer_euros.iloc[:, 1:].T  # Exclude CLIENTE column
-            sales_data.index = sales_data.index.astype(str)
-            # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
-            sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
-            # Ensure all values are numeric
-            sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
-            # Sort manufacturers by percentage of units and get top 10
-            top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
-            # Sort manufacturers by total sales and get top 10
-            top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
-            # Combine top manufacturers from both lists and get up to 20 unique manufacturers
-            combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
-            # Filter out manufacturers that are not present in both datasets
-            combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
-            # Create a DataFrame with combined data for these top manufacturers
-            combined_data = pd.DataFrame({
-                'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
-                'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
-            }).fillna(0)
-            # Sort by units, then by sales
-            combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
-            # Filter out manufacturers with 0 units
-            non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
-            # If we have less than 3 non-zero manufacturers, add some zero-value ones
-            if len(non_zero_manufacturers) < 3:
-                zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
-                manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
-            else:
-                manufacturers_to_show = non_zero_manufacturers
-            values = manufacturers_to_show['units'].tolist()
-            amounts = manufacturers_to_show['sales'].tolist()
-            manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
-            st.write(f"### Results for top {len(manufacturers)} manufacturers:")
-            for manufacturer, value, amount in zip(manufacturers, values, amounts):
-                st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
-            if manufacturers:  # Only create the chart if we have data
-                fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
-                st.pyplot(fig)
-            else:
-                st.warning("No data available to create the radar chart.")
-            # Customer sales 2021-2024 (if data exists)
-            sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023', 'VENTA_2024']
-            if all(col in df.columns for col in sales_columns):
-                years = ['2021', '2022', '2023', '2024']
-                customer_sales = customer_data[sales_columns].values[0]
-                fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
-                fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
-                st.plotly_chart(fig_sales)
             else:
-                st.warning("Sales data for 2021-2024 not available.")
         else:
-            st.warning(f"No data found for customer {customer_code}. Please check the code.")
 # Customer Recommendations Page
 elif page == "Articles Recommendations":
@@ -209,9 +212,9 @@ elif page == "Articles Recommendations":
     else:
         filtered_customers = df
     customer_list = filtered_customers['CLIENTE'].unique()
-    customer_code = st.selectbox("Select Customer Code for Recommendations", customer_list)
-    # DEfinicion de la funcion recomienda
     def recomienda(new_basket):
         # Calcular la matriz TF-IDF
         tfidf = TfidfVectorizer()
@@ -224,7 +227,6 @@ elif page == "Articles Recommendations":
         # Comparar la nueva cesta con las anteriores
         similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
         # Obtener los índices de las cestas más similares
         similar_indices = similarities.argsort()[0][-3:]  # Las 3 más similares
@@ -237,13 +239,13 @@ elif page == "Articles Recommendations":
             sim_score = similarities[0][idx]
             total_similarity += sim_score
             products = cestas.iloc[idx]['Cestas'].split()
-        for product in products:
-            if product.strip() not in new_basket:  # Evitar recomendar lo que ya está en la cesta
-                if product.strip() in recommendations_count:
-                    recommendations_count[product.strip()] += sim_score
-                else:
-                    recommendations_count[product.strip()] = sim_score
         # Calcular la probabilidad relativa de cada producto recomendado
         recommendations_with_prob = []
@@ -259,22 +261,21 @@ elif page == "Articles Recommendations":
         # Agregar las recomendaciones al DataFrame usando pd.concat
         for product, prob in recommendations_with_prob:
-        # Buscar la descripción en el DataFrame de productos
             description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
             if not description.empty:
-            # Crear un nuevo DataFrame temporal para la recomendación
-                    temp_df = pd.DataFrame({
-                        'ARTICULO': [product],
-                        'DESCRIPCION': [description.values[0]],  # Obtener el primer valor encontrado
-                        'PROBABILIDAD': [prob]
-                     })
-            # Concatenar el DataFrame temporal al DataFrame de recomendaciones
-            recommendations_df = pd.concat([recommendations_df, temp_df], ignore_index=True)
-            return recommendations_df
     # Comprobar si el cliente está en el CSV de fieles
     is_fiel = customer_code in fieles_df['Cliente'].astype(str).values
     if customer_code:
@@ -284,14 +285,38 @@ elif page == "Articles Recommendations":
             if option == "By Purchase History":
                 st.warning("Option not available... aún")
-            elif option == "By Current Basket":
                 st.write("Enter the items in the basket:")
                 # Input para los artículos y unidades
                 items = st.text_input("Enter items (comma-separated):").split(',')
                 quantities = st.text_input("Enter quantities (comma-separated):").split(',')
                 # Crear una lista de artículos basada en la entrada
                 new_basket = [item.strip() for item in items]
@@ -306,27 +331,4 @@ elif page == "Articles Recommendations":
                     else:
                         st.warning("No recommendations found for the provided basket.")
                 else:
-                    st.warning("The number of items must match the number of quantities.")
-        else:
-            st.write(f"### Customer {customer_code} is not a loyal customer.")
-            st.write("Recommendation based on the basket. Please enter the items:")
-            # Input para los artículos y unidades
-            items = st.text_input("Enter items (comma-separated):").split(',')
-            quantities = st.text_input("Enter quantities (comma-separated):").split(',')
-            # Crear una lista de artículos basada en la entrada
-            new_basket = [item.strip() for item in items]
-            # Asegurarse de que las longitudes de artículos y cantidades coincidan
-            if len(new_basket) == len(quantities):
-                # Procesar la lista para recomendar
-                recommendations_df = recomienda(new_basket)
-                if not recommendations_df.empty:
-                    st.write("### Recommendations based on the current basket:")
-                    st.dataframe(recommendations_df)
-                else:
-                    st.warning("No recommendations found for the provided basket.")
-            else:
-                st.warning("The number of items must match the number of quantities.")

     customer_list = filtered_customers['CLIENTE'].unique()
     customer_code = st.selectbox("Select Customer Code", customer_list)
+    if st.button("Calcular"):
+        if customer_code:
+            customer_data = df[df["CLIENTE"] == str(customer_code)]
+            customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
+            if not customer_data.empty and not customer_euros.empty:
+                st.write(f"### Analysis for Customer {customer_code}")
+                # Get percentage of units sold for each manufacturer
+                all_manufacturers = customer_data.iloc[:, 1:].T  # Exclude CLIENTE column
+                all_manufacturers.index = all_manufacturers.index.astype(str)
+                # Get total sales for each manufacturer
+                sales_data = customer_euros.iloc[:, 1:].T  # Exclude CLIENTE column
+                sales_data.index = sales_data.index.astype(str)
+                # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
+                sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
+                # Ensure all values are numeric
+                sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
+                # Sort manufacturers by percentage of units and get top 10
+                top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
+                # Sort manufacturers by total sales and get top 10
+                top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
+                # Combine top manufacturers from both lists and get up to 20 unique manufacturers
+                combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
+                # Filter out manufacturers that are not present in both datasets
+                combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
+                # Create a DataFrame with combined data for these top manufacturers
+                combined_data = pd.DataFrame({
+                    'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
+                    'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
+                }).fillna(0)
+                # Sort by units, then by sales
+                combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
+                # Filter out manufacturers with 0 units
+                non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
+                # If we have less than 3 non-zero manufacturers, add some zero-value ones
+                if len(non_zero_manufacturers) < 3:
+                    zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
+                    manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
+                else:
+                    manufacturers_to_show = non_zero_manufacturers
+                values = manufacturers_to_show['units'].tolist()
+                amounts = manufacturers_to_show['sales'].tolist()
+                manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
+                st.write(f"### Results for top {len(manufacturers)} manufacturers:")
+                for manufacturer, value, amount in zip(manufacturers, values, amounts):
+                    st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
+                if manufacturers:  # Only create the chart if we have data
+                    fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
+                    st.pyplot(fig)
+                else:
+                    st.warning("No data available to create the radar chart.")
+                # Customer sales 2021-2024 (if data exists)
+                sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023', 'VENTA_2024']
+                if all(col in df.columns for col in sales_columns):
+                    years = ['2021', '2022', '2023', '2024']
+                    customer_sales = customer_data[sales_columns].values[0]
+                    fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
+                    fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
+                    st.plotly_chart(fig_sales)
+                else:
+                    st.warning("Sales data for 2021-2024 not available.")
             else:
+                st.warning(f"No data found for customer {customer_code}. Please check the code.")
         else:
+            st.warning("Please select a customer.")
 # Customer Recommendations Page
 elif page == "Articles Recommendations":
     else:
         filtered_customers = df
     customer_list = filtered_customers['CLIENTE'].unique()
+    customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))
+    # Definición de la función recomienda
     def recomienda(new_basket):
         # Calcular la matriz TF-IDF
         tfidf = TfidfVectorizer()
         # Comparar la nueva cesta con las anteriores
         similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
         # Obtener los índices de las cestas más similares
         similar_indices = similarities.argsort()[0][-3:]  # Las 3 más similares
             sim_score = similarities[0][idx]
             total_similarity += sim_score
             products = cestas.iloc[idx]['Cestas'].split()
+            for product in products:
+                if product.strip() not in new_basket:  # Evitar recomendar lo que ya está en la cesta
+                    if product.strip() in recommendations_count:
+                        recommendations_count[product.strip()] += sim_score
+                    else:
+                        recommendations_count[product.strip()] = sim_score
         # Calcular la probabilidad relativa de cada producto recomendado
         recommendations_with_prob = []
         # Agregar las recomendaciones al DataFrame usando pd.concat
         for product, prob in recommendations_with_prob:
+            # Buscar la descripción en el DataFrame de productos
             description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
             if not description.empty:
+                # Crear un nuevo DataFrame temporal para la recomendación
+                temp_df = pd.DataFrame({
+                    'ARTICULO': [product],
+                    'DESCRIPCION': [description.values[0]],  # Obtener el primer valor encontrado
+                    'PROBABILIDAD': [prob]
+                })
+                # Concatenar el DataFrame temporal al DataFrame de recomendaciones
+                recommendations_df = pd.concat([recommendations_df, temp_df], ignore_index=True)
+        return recommendations_df
     # Comprobar si el cliente está en el CSV de fieles
     is_fiel = customer_code in fieles_df['Cliente'].astype(str).values
     if customer_code:
             if option == "By Purchase History":
                 st.warning("Option not available... aún")
+            elif option == "By Current Basket":
                 st.write("Enter the items in the basket:")
                 # Input para los artículos y unidades
                 items = st.text_input("Enter items (comma-separated):").split(',')
                 quantities = st.text_input("Enter quantities (comma-separated):").split(',')
+                if st.button("Calcular"):  # Añadimos el botón "Calcular"
+                    # Crear una lista de artículos basada en la entrada
+                    new_basket = [item.strip() for item in items]
+                    # Asegurarse de que las longitudes de artículos y cantidades coincidan
+                    if len(new_basket) == len(quantities):
+                        # Procesar la lista para recomendar
+                        recommendations_df = recomienda(new_basket)
+                        if not recommendations_df.empty:
+                            st.write("### Recommendations based on the current basket:")
+                            st.dataframe(recommendations_df)
+                        else:
+                            st.warning("No recommendations found for the provided basket.")
+                    else:
+                        st.warning("The number of items must match the number of quantities.")
+        else:
+            st.write(f"### Customer {customer_code} is not a loyal customer.")
+            st.write("Recommendation based on the basket. Please enter the items:")
+            # Input para los artículos y unidades
+            items = st.text_input("Enter items (comma-separated):").split(',')
+            quantities = st.text_input("Enter quantities (comma-separated):").split(',')
+            if st.button("Calcular"):  # Añadimos el botón "Calcular"
                 # Crear una lista de artículos basada en la entrada
                 new_basket = [item.strip() for item in items]
                     else:
                         st.warning("No recommendations found for the provided basket.")
                 else:
+                    st.warning("The number of items must match the number of quantities.")