Spaces:

GMARTINEZMILLA
/

Final_Project

Sleeping

App Files Files Community

GMARTINEZMILLA commited on Oct 15

Commit

9f5e05c

•

1 Parent(s): 4508fcb

bugfix: added import lgbm

Browse files

Files changed (1) hide show

app.py +168 -0

app.py CHANGED Viewed

@@ -104,6 +104,143 @@ if page == "":
     st.write("Use the dropdown menu to navigate between the different sections.")
 # Customer Analysis Page
 elif page == "Customer Analysis":
     st.title("Customer Analysis")
     st.markdown("Use the tools below to explore your customer data.")
@@ -130,34 +267,64 @@ elif page == "Customer Analysis":
                 gbm = lgb.Booster(model_file=model_path)
                 st.write(f"Loaded model for cluster {cluster}")
                 # Load X_predict for that cluster
                 X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
                 # Filter for the specific customer
                 X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
                 if not X_cliente.empty:
                     # Prepare data for prediction
                     features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
                     # Make Prediction for the selected customer
                     y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
                     # Reassemble the results
                     results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
                     results['ventas_predichas'] = y_pred
                     st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
                     # Load actual data
                     df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
                     actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
                     if not actual_sales.empty:
                         results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
                                                 on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
                                                 how='left')
                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
                         # Calculate metrics only for non-null actual sales
                         valid_results = results.dropna(subset=['ventas_reales'])
@@ -241,6 +408,7 @@ elif page == "Customer Analysis":
         else:
             st.warning("Please select a customer.")
 # Customer Recommendations Page
 elif page == "Articles Recommendations":
     st.title("Articles Recommendations")

     st.write("Use the dropdown menu to navigate between the different sections.")
 # Customer Analysis Page
+# elif page == "Customer Analysis":
+#     st.title("Customer Analysis")
+#     st.markdown("Use the tools below to explore your customer data.")
+#     partial_code = st.text_input("Enter part of Customer Code (or leave empty to see all)")
+#     if partial_code:
+#         filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
+#     else:
+#         filtered_customers = df
+#     customer_list = filtered_customers['CLIENTE'].unique()
+#     customer_code = st.selectbox("Select Customer Code", customer_list)
+#     if st.button("Calcular"):
+#         if customer_code:
+#             # Find Customer's Cluster
+#             customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
+#             if not customer_match.empty:
+#                 cluster = customer_match['cluster_id'].values[0]
+#                 st.write(f"Customer {customer_code} belongs to cluster {cluster}")
+#                 # Load the Corresponding Model
+#                 model_path = f'models/modelo_cluster_{cluster}.txt'
+#                 gbm = lgb.Booster(model_file=model_path)
+#                 st.write(f"Loaded model for cluster {cluster}")
+#                 # Load X_predict for that cluster
+#                 X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
+#                 # Filter for the specific customer
+#                 X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
+#                 if not X_cliente.empty:
+#                     # Prepare data for prediction
+#                     features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
+#                     # Make Prediction for the selected customer
+#                     y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
+#                     # Reassemble the results
+#                     results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
+#                     results['ventas_predichas'] = y_pred
+#                     st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
+#                     # Load actual data
+#                     df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
+#                     actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
+#                     if not actual_sales.empty:
+#                         results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
+#                                                 on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
+#                                                 how='left')
+#                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
+#                         # Calculate metrics only for non-null actual sales
+#                         valid_results = results.dropna(subset=['ventas_reales'])
+#                         if not valid_results.empty:
+#                             mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
+#                             mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
+#                             rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
+#                             st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
+#                             st.write(f"MAE: {mae:.2f}")
+#                             st.write(f"MAPE: {mape:.2f}%")
+#                             st.write(f"RMSE: {rmse:.2f}")
+#                         # Analysis of results
+#                         threshold_good = 100  # You may want to adjust this threshold
+#                         if mae < threshold_good:
+#                             st.success(f"Customer {customer_code} is performing well based on the predictions.")
+#                         else:
+#                             st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
+#                     else:
+#                         st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
+#                     # Show the radar chart
+#                     all_manufacturers = customer_data.iloc[:, 1:].T  # Exclude CLIENTE column
+#                     all_manufacturers.index = all_manufacturers.index.astype(str)
+#                     sales_data = customer_euros.iloc[:, 1:].T  # Exclude CLIENTE column
+#                     sales_data.index = sales_data.index.astype(str)
+#                     sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
+#                     sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
+#                     top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
+#                     top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
+#                     combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
+#                     combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
+#                     combined_data = pd.DataFrame({
+#                         'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
+#                         'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
+#                     }).fillna(0)
+#                     combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
+#                     non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
+#                     if len(non_zero_manufacturers) < 3:
+#                         zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
+#                         manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
+#                     else:
+#                         manufacturers_to_show = non_zero_manufacturers
+#                     values = manufacturers_to_show['units'].tolist()
+#                     amounts = manufacturers_to_show['sales'].tolist()
+#                     manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
+#                     st.write(f"### Results for top {len(manufacturers)} manufacturers:")
+#                     for manufacturer, value, amount in zip(manufacturers, values, amounts):
+#                         st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
+#                     if manufacturers:
+#                         fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
+#                         st.pyplot(fig)
+#                     else:
+#                         st.warning("No data available to create the radar chart.")
+#                     # Show sales over the years graph
+#                     sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
+#                     if all(col in ventas_clientes.columns for col in sales_columns):
+#                         years = ['2021', '2022', '2023']
+#                         customer_sales = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code][sales_columns].values[0]
+#                         fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
+#                         fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
+#                         st.plotly_chart(fig_sales)
+#                     else:
+#                         st.warning("Sales data for 2021-2023 not available.")
+#                 else:
+#                     st.warning(f"No prediction data found for customer {customer_code}.")
+#             else:
+#                 st.warning(f"No data found for customer {customer_code}. Please check the code.")
+#         else:
+#             st.warning("Please select a customer.")
 elif page == "Customer Analysis":
     st.title("Customer Analysis")
     st.markdown("Use the tools below to explore your customer data.")
                 gbm = lgb.Booster(model_file=model_path)
                 st.write(f"Loaded model for cluster {cluster}")
+                # Inspect the model
+                st.write("### Model Information:")
+                st.write(f"Number of trees: {gbm.num_trees()}")
+                st.write(f"Number of features: {gbm.num_feature()}")
+                st.write("Feature names:")
+                st.write(gbm.feature_name())
                 # Load X_predict for that cluster
                 X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
+                st.write("### X_predict_cluster DataFrame:")
+                st.write(X_predict_cluster.head())
+                st.write(f"Shape: {X_predict_cluster.shape}")
                 # Filter for the specific customer
                 X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
+                st.write("### X_cliente DataFrame:")
+                st.write(X_cliente.head())
+                st.write(f"Shape: {X_cliente.shape}")
                 if not X_cliente.empty:
                     # Prepare data for prediction
                     features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
+                    st.write("### Features for Prediction:")
+                    st.write(features_for_prediction.head())
+                    st.write(f"Shape: {features_for_prediction.shape}")
                     # Make Prediction for the selected customer
                     y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
+                    st.write("### Prediction Results:")
+                    st.write(f"Type of y_pred: {type(y_pred)}")
+                    st.write(f"Shape of y_pred: {y_pred.shape}")
+                    st.write("First few predictions:")
+                    st.write(y_pred[:5])
                     # Reassemble the results
                     results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
                     results['ventas_predichas'] = y_pred
+                    st.write("### Results DataFrame:")
+                    st.write(results.head())
+                    st.write(f"Shape: {results.shape}")
                     st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
                     # Load actual data
                     df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
                     actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
+                    st.write("### Actual Sales DataFrame:")
+                    st.write(actual_sales.head())
+                    st.write(f"Shape: {actual_sales.shape}")
                     if not actual_sales.empty:
                         results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
                                                 on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
                                                 how='left')
                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
+                        st.write("### Final Results DataFrame:")
+                        st.write(results.head())
+                        st.write(f"Shape: {results.shape}")
                         # Calculate metrics only for non-null actual sales
                         valid_results = results.dropna(subset=['ventas_reales'])
         else:
             st.warning("Please select a customer.")
 # Customer Recommendations Page
 elif page == "Articles Recommendations":
     st.title("Articles Recommendations")