GMARTINEZMILLA commited on
Commit
4508fcb
1 Parent(s): 2cd23d8

bugfix: added import lgbm

Browse files
Files changed (1) hide show
  1. app.py +23 -30
app.py CHANGED
@@ -118,37 +118,30 @@ elif page == "Customer Analysis":
118
 
119
  if st.button("Calcular"):
120
  if customer_code:
121
- customer_data = df[df["CLIENTE"] == str(customer_code)]
122
- customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
123
 
124
- # Check if customer data exists
125
- if not customer_data.empty and not customer_euros.empty:
126
- st.write(f"### Analysis for Customer {customer_code}")
127
-
128
- # Find Customer's Cluster
129
- customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
130
-
131
- if not customer_match.empty:
132
- cluster = customer_match['cluster_id'].values[0]
133
- st.write(f"Customer {customer_code} belongs to cluster {cluster}")
134
- else:
135
- st.error(f"Customer {customer_code} not found in customer_clusters.")
136
- st.stop() # Stop further execution if no cluster is found
137
 
138
  # Load the Corresponding Model
139
  model_path = f'models/modelo_cluster_{cluster}.txt'
140
  gbm = lgb.Booster(model_file=model_path)
141
  st.write(f"Loaded model for cluster {cluster}")
142
 
143
- # Load X_predict for that cluster and extract customer-specific data
144
  X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
145
 
146
- # Filter for the specific customer and drop the 'cliente_id' column
147
  X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
148
 
149
  if not X_cliente.empty:
 
 
 
150
  # Make Prediction for the selected customer
151
- y_pred = gbm.predict(X_cliente.drop(columns=['cliente_id', 'fecha_mes']), num_iteration=gbm.best_iteration)
152
 
153
  # Reassemble the results
154
  results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
@@ -156,7 +149,7 @@ elif page == "Customer Analysis":
156
 
157
  st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
158
 
159
- # Merge with actual data from df_agg_2024
160
  df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
161
  actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
162
 
@@ -166,17 +159,17 @@ elif page == "Customer Analysis":
166
  how='left')
167
  results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
168
 
169
- # Calculate metrics
170
- mae = mean_absolute_error(results['ventas_reales'], results['ventas_predichas'])
171
- mape = np.mean(np.abs((results['ventas_reales'] - results['ventas_predichas']) / results['ventas_reales'])) * 100
172
- rmse = np.sqrt(mean_squared_error(results['ventas_reales'], results['ventas_predichas']))
173
- smape_value = smape(results['ventas_reales'], results['ventas_predichas'])
174
-
175
- st.write(f"Actual total sales for Customer {customer_code}: {results['ventas_reales'].sum():.2f}")
176
- st.write(f"MAE: {mae:.2f}")
177
- st.write(f"MAPE: {mape:.2f}%")
178
- st.write(f"RMSE: {rmse:.2f}")
179
- st.write(f"SMAPE: {smape_value:.2f}%")
180
 
181
  # Analysis of results
182
  threshold_good = 100 # You may want to adjust this threshold
 
118
 
119
  if st.button("Calcular"):
120
  if customer_code:
121
+ # Find Customer's Cluster
122
+ customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
123
 
124
+ if not customer_match.empty:
125
+ cluster = customer_match['cluster_id'].values[0]
126
+ st.write(f"Customer {customer_code} belongs to cluster {cluster}")
 
 
 
 
 
 
 
 
 
 
127
 
128
  # Load the Corresponding Model
129
  model_path = f'models/modelo_cluster_{cluster}.txt'
130
  gbm = lgb.Booster(model_file=model_path)
131
  st.write(f"Loaded model for cluster {cluster}")
132
 
133
+ # Load X_predict for that cluster
134
  X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
135
 
136
+ # Filter for the specific customer
137
  X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
138
 
139
  if not X_cliente.empty:
140
+ # Prepare data for prediction
141
+ features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
142
+
143
  # Make Prediction for the selected customer
144
+ y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
145
 
146
  # Reassemble the results
147
  results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
 
149
 
150
  st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
151
 
152
+ # Load actual data
153
  df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
154
  actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
155
 
 
159
  how='left')
160
  results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
161
 
162
+ # Calculate metrics only for non-null actual sales
163
+ valid_results = results.dropna(subset=['ventas_reales'])
164
+ if not valid_results.empty:
165
+ mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
166
+ mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
167
+ rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
168
+
169
+ st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
170
+ st.write(f"MAE: {mae:.2f}")
171
+ st.write(f"MAPE: {mape:.2f}%")
172
+ st.write(f"RMSE: {rmse:.2f}")
173
 
174
  # Analysis of results
175
  threshold_good = 100 # You may want to adjust this threshold