GMARTINEZMILLA commited on
Commit
ddf19f6
1 Parent(s): 4cdd823

bugfix: added import lgbm

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -125,7 +125,7 @@ elif page == "Customer Analysis":
125
  if not customer_data.empty and not customer_euros.empty:
126
  st.write(f"### Analysis for Customer {customer_code}")
127
 
128
- # **Find Customer's Cluster**
129
  customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
130
 
131
  if not customer_match.empty:
@@ -135,44 +135,44 @@ elif page == "Customer Analysis":
135
  st.error(f"Customer {customer_code} not found in customer_clusters.")
136
  st.stop() # Stop further execution if no cluster is found
137
 
138
- # **Step 2: Load the Corresponding Model**
139
  model_path = f'models/modelo_cluster_{cluster}.txt'
140
  gbm = lgb.Booster(model_file=model_path)
141
  st.write(f"Loaded model for cluster {cluster}")
142
 
143
- # **Step 3: Load X_predict for that cluster and extract customer-specific data**
144
  X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
145
  X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
146
 
147
  if not X_cliente.empty:
148
- # **Step 4: Make Prediction for the selected customer**
149
  y_pred = gbm.predict(X_cliente.drop(columns=['cliente_id']), num_iteration=gbm.best_iteration)
150
- st.write(f"Predicted sales for Customer {customer_code}: {y_pred[0]:.2f}")
151
 
152
- # **Step 5: Merge with actual data from df_agg_2024**
153
  df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
154
  actual_sales = df_agg_2024[(df_agg_2024['cliente_id'] == customer_code) & (df_agg_2024['marca_id_encoded'].isin(X_cliente['marca_id_encoded']))]
155
  if not actual_sales.empty:
156
- merged_data = pd.merge(
157
- pd.DataFrame({'cliente_id': [customer_code], 'ventas_predichas': y_pred}),
158
- actual_sales[['cliente_id', 'marca_id_encoded', 'precio_total']],
159
- on='cliente_id',
160
- how='left'
161
- )
162
- merged_data.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
163
-
164
- # Calculate metrics (MAE, MAPE, RMSE, SMAPE)
165
  mae = mean_absolute_error(merged_data['ventas_reales'], merged_data['ventas_predichas'])
166
  mape = np.mean(np.abs((merged_data['ventas_reales'] - merged_data['ventas_predichas']) / merged_data['ventas_reales'])) * 100
167
  rmse = np.sqrt(mean_squared_error(merged_data['ventas_reales'], merged_data['ventas_predichas']))
168
  smape_value = smape(merged_data['ventas_reales'], merged_data['ventas_predichas'])
169
 
 
170
  st.write(f"MAE: {mae:.2f}")
171
  st.write(f"MAPE: {mape:.2f}%")
172
  st.write(f"RMSE: {rmse:.2f}")
173
  st.write(f"SMAPE: {smape_value:.2f}%")
174
 
175
- # **Step 6: Analysis of results (show insights if the customer is performing well or not)**
 
176
  if mae < threshold_good:
177
  st.success(f"Customer {customer_code} is performing well based on the predictions.")
178
  else:
 
125
  if not customer_data.empty and not customer_euros.empty:
126
  st.write(f"### Analysis for Customer {customer_code}")
127
 
128
+ # Find Customer's Cluster
129
  customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
130
 
131
  if not customer_match.empty:
 
135
  st.error(f"Customer {customer_code} not found in customer_clusters.")
136
  st.stop() # Stop further execution if no cluster is found
137
 
138
+ # Load the Corresponding Model
139
  model_path = f'models/modelo_cluster_{cluster}.txt'
140
  gbm = lgb.Booster(model_file=model_path)
141
  st.write(f"Loaded model for cluster {cluster}")
142
 
143
+ # Load X_predict for that cluster and extract customer-specific data
144
  X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
145
  X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
146
 
147
  if not X_cliente.empty:
148
+ # Make Prediction for the selected customer
149
  y_pred = gbm.predict(X_cliente.drop(columns=['cliente_id']), num_iteration=gbm.best_iteration)
150
+ st.write(f"Predicted sales for Customer {customer_code}: {y_pred.sum():.2f}")
151
 
152
+ # Merge with actual data from df_agg_2024
153
  df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
154
  actual_sales = df_agg_2024[(df_agg_2024['cliente_id'] == customer_code) & (df_agg_2024['marca_id_encoded'].isin(X_cliente['marca_id_encoded']))]
155
  if not actual_sales.empty:
156
+ merged_data = pd.DataFrame({
157
+ 'cliente_id': [customer_code],
158
+ 'ventas_predichas': [y_pred.sum()],
159
+ 'ventas_reales': [actual_sales['precio_total'].sum()]
160
+ })
161
+
162
+ # Calculate metrics
 
 
163
  mae = mean_absolute_error(merged_data['ventas_reales'], merged_data['ventas_predichas'])
164
  mape = np.mean(np.abs((merged_data['ventas_reales'] - merged_data['ventas_predichas']) / merged_data['ventas_reales'])) * 100
165
  rmse = np.sqrt(mean_squared_error(merged_data['ventas_reales'], merged_data['ventas_predichas']))
166
  smape_value = smape(merged_data['ventas_reales'], merged_data['ventas_predichas'])
167
 
168
+ st.write(f"Actual sales for Customer {customer_code}: {merged_data['ventas_reales'].values[0]:.2f}")
169
  st.write(f"MAE: {mae:.2f}")
170
  st.write(f"MAPE: {mape:.2f}%")
171
  st.write(f"RMSE: {rmse:.2f}")
172
  st.write(f"SMAPE: {smape_value:.2f}%")
173
 
174
+ # Analysis of results
175
+ threshold_good = 100 # You may want to adjust this threshold
176
  if mae < threshold_good:
177
  st.success(f"Customer {customer_code} is performing well based on the predictions.")
178
  else: