Spaces:

GMARTINEZMILLA
/

Final_Project

Sleeping

App Files Files Community

GMARTINEZMILLA commited on Oct 15

Commit

ddf19f6

•

1 Parent(s): 4cdd823

bugfix: added import lgbm

Browse files

Files changed (1) hide show

app.py +16 -16

app.py CHANGED Viewed

@@ -125,7 +125,7 @@ elif page == "Customer Analysis":
             if not customer_data.empty and not customer_euros.empty:
                 st.write(f"### Analysis for Customer {customer_code}")
-                # **Find Customer's Cluster**
                 customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
                 if not customer_match.empty:
@@ -135,44 +135,44 @@ elif page == "Customer Analysis":
                     st.error(f"Customer {customer_code} not found in customer_clusters.")
                     st.stop()  # Stop further execution if no cluster is found
-                # **Step 2: Load the Corresponding Model**
                 model_path = f'models/modelo_cluster_{cluster}.txt'
                 gbm = lgb.Booster(model_file=model_path)
                 st.write(f"Loaded model for cluster {cluster}")
-                # **Step 3: Load X_predict for that cluster and extract customer-specific data**
                 X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
                 X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
                 if not X_cliente.empty:
-                    # **Step 4: Make Prediction for the selected customer**
                     y_pred = gbm.predict(X_cliente.drop(columns=['cliente_id']), num_iteration=gbm.best_iteration)
-                    st.write(f"Predicted sales for Customer {customer_code}: {y_pred[0]:.2f}")
-                    # **Step 5: Merge with actual data from df_agg_2024**
                     df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
                     actual_sales = df_agg_2024[(df_agg_2024['cliente_id'] == customer_code) & (df_agg_2024['marca_id_encoded'].isin(X_cliente['marca_id_encoded']))]
                     if not actual_sales.empty:
-                        merged_data = pd.merge(
-                            pd.DataFrame({'cliente_id': [customer_code], 'ventas_predichas': y_pred}),
-                            actual_sales[['cliente_id', 'marca_id_encoded', 'precio_total']],
-                            on='cliente_id',
-                            how='left'
-                        )
-                        merged_data.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
-                        # Calculate metrics (MAE, MAPE, RMSE, SMAPE)
                         mae = mean_absolute_error(merged_data['ventas_reales'], merged_data['ventas_predichas'])
                         mape = np.mean(np.abs((merged_data['ventas_reales'] - merged_data['ventas_predichas']) / merged_data['ventas_reales'])) * 100
                         rmse = np.sqrt(mean_squared_error(merged_data['ventas_reales'], merged_data['ventas_predichas']))
                         smape_value = smape(merged_data['ventas_reales'], merged_data['ventas_predichas'])
                         st.write(f"MAE: {mae:.2f}")
                         st.write(f"MAPE: {mape:.2f}%")
                         st.write(f"RMSE: {rmse:.2f}")
                         st.write(f"SMAPE: {smape_value:.2f}%")
-                        # **Step 6: Analysis of results (show insights if the customer is performing well or not)**
                         if mae < threshold_good:
                             st.success(f"Customer {customer_code} is performing well based on the predictions.")
                         else:

             if not customer_data.empty and not customer_euros.empty:
                 st.write(f"### Analysis for Customer {customer_code}")
+                # Find Customer's Cluster
                 customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
                 if not customer_match.empty:
                     st.error(f"Customer {customer_code} not found in customer_clusters.")
                     st.stop()  # Stop further execution if no cluster is found
+                # Load the Corresponding Model
                 model_path = f'models/modelo_cluster_{cluster}.txt'
                 gbm = lgb.Booster(model_file=model_path)
                 st.write(f"Loaded model for cluster {cluster}")
+                # Load X_predict for that cluster and extract customer-specific data
                 X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
                 X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
                 if not X_cliente.empty:
+                    # Make Prediction for the selected customer
                     y_pred = gbm.predict(X_cliente.drop(columns=['cliente_id']), num_iteration=gbm.best_iteration)
+                    st.write(f"Predicted sales for Customer {customer_code}: {y_pred.sum():.2f}")
+                    # Merge with actual data from df_agg_2024
                     df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
                     actual_sales = df_agg_2024[(df_agg_2024['cliente_id'] == customer_code) & (df_agg_2024['marca_id_encoded'].isin(X_cliente['marca_id_encoded']))]
                     if not actual_sales.empty:
+                        merged_data = pd.DataFrame({
+                            'cliente_id': [customer_code],
+                            'ventas_predichas': [y_pred.sum()],
+                            'ventas_reales': [actual_sales['precio_total'].sum()]
+                        })
+                        # Calculate metrics
                         mae = mean_absolute_error(merged_data['ventas_reales'], merged_data['ventas_predichas'])
                         mape = np.mean(np.abs((merged_data['ventas_reales'] - merged_data['ventas_predichas']) / merged_data['ventas_reales'])) * 100
                         rmse = np.sqrt(mean_squared_error(merged_data['ventas_reales'], merged_data['ventas_predichas']))
                         smape_value = smape(merged_data['ventas_reales'], merged_data['ventas_predichas'])
+                        st.write(f"Actual sales for Customer {customer_code}: {merged_data['ventas_reales'].values[0]:.2f}")
                         st.write(f"MAE: {mae:.2f}")
                         st.write(f"MAPE: {mape:.2f}%")
                         st.write(f"RMSE: {rmse:.2f}")
                         st.write(f"SMAPE: {smape_value:.2f}%")
+                        # Analysis of results
+                        threshold_good = 100  # You may want to adjust this threshold
                         if mae < threshold_good:
                             st.success(f"Customer {customer_code} is performing well based on the predictions.")
                         else: