Spaces:

GMARTINEZMILLA
/

Final_Project

Sleeping

App Files Files Community

GMARTINEZMILLA commited on Oct 15

Commit

4508fcb

•

1 Parent(s): 2cd23d8

bugfix: added import lgbm

Browse files

Files changed (1) hide show

app.py +23 -30

app.py CHANGED Viewed

@@ -118,37 +118,30 @@ elif page == "Customer Analysis":
     if st.button("Calcular"):
         if customer_code:
-            customer_data = df[df["CLIENTE"] == str(customer_code)]
-            customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
-            # Check if customer data exists
-            if not customer_data.empty and not customer_euros.empty:
-                st.write(f"### Analysis for Customer {customer_code}")
-                # Find Customer's Cluster
-                customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
-                if not customer_match.empty:
-                    cluster = customer_match['cluster_id'].values[0]
-                    st.write(f"Customer {customer_code} belongs to cluster {cluster}")
-                else:
-                    st.error(f"Customer {customer_code} not found in customer_clusters.")
-                    st.stop()  # Stop further execution if no cluster is found
                 # Load the Corresponding Model
                 model_path = f'models/modelo_cluster_{cluster}.txt'
                 gbm = lgb.Booster(model_file=model_path)
                 st.write(f"Loaded model for cluster {cluster}")
-                # Load X_predict for that cluster and extract customer-specific data
                 X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
-                # Filter for the specific customer and drop the 'cliente_id' column
                 X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
                 if not X_cliente.empty:
                     # Make Prediction for the selected customer
-                    y_pred = gbm.predict(X_cliente.drop(columns=['cliente_id', 'fecha_mes']), num_iteration=gbm.best_iteration)
                     # Reassemble the results
                     results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
@@ -156,7 +149,7 @@ elif page == "Customer Analysis":
                     st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
-                    # Merge with actual data from df_agg_2024
                     df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
                     actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
@@ -166,17 +159,17 @@ elif page == "Customer Analysis":
                                                 how='left')
                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
-                        # Calculate metrics
-                        mae = mean_absolute_error(results['ventas_reales'], results['ventas_predichas'])
-                        mape = np.mean(np.abs((results['ventas_reales'] - results['ventas_predichas']) / results['ventas_reales'])) * 100
-                        rmse = np.sqrt(mean_squared_error(results['ventas_reales'], results['ventas_predichas']))
-                        smape_value = smape(results['ventas_reales'], results['ventas_predichas'])
-                        st.write(f"Actual total sales for Customer {customer_code}: {results['ventas_reales'].sum():.2f}")
-                        st.write(f"MAE: {mae:.2f}")
-                        st.write(f"MAPE: {mape:.2f}%")
-                        st.write(f"RMSE: {rmse:.2f}")
-                        st.write(f"SMAPE: {smape_value:.2f}%")
                         # Analysis of results
                         threshold_good = 100  # You may want to adjust this threshold

     if st.button("Calcular"):
         if customer_code:
+            # Find Customer's Cluster
+            customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
+            if not customer_match.empty:
+                cluster = customer_match['cluster_id'].values[0]
+                st.write(f"Customer {customer_code} belongs to cluster {cluster}")
                 # Load the Corresponding Model
                 model_path = f'models/modelo_cluster_{cluster}.txt'
                 gbm = lgb.Booster(model_file=model_path)
                 st.write(f"Loaded model for cluster {cluster}")
+                # Load X_predict for that cluster
                 X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
+                # Filter for the specific customer
                 X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
                 if not X_cliente.empty:
+                    # Prepare data for prediction
+                    features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
                     # Make Prediction for the selected customer
+                    y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
                     # Reassemble the results
                     results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
                     st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
+                    # Load actual data
                     df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
                     actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
                                                 how='left')
                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
+                        # Calculate metrics only for non-null actual sales
+                        valid_results = results.dropna(subset=['ventas_reales'])
+                        if not valid_results.empty:
+                            mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
+                            mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
+                            rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
+                            st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
+                            st.write(f"MAE: {mae:.2f}")
+                            st.write(f"MAPE: {mape:.2f}%")
+                            st.write(f"RMSE: {rmse:.2f}")
                         # Analysis of results
                         threshold_good = 100  # You may want to adjust this threshold