Spaces:
Sleeping
Sleeping
GMARTINEZMILLA
commited on
Commit
•
4508fcb
1
Parent(s):
2cd23d8
bugfix: added import lgbm
Browse files
app.py
CHANGED
@@ -118,37 +118,30 @@ elif page == "Customer Analysis":
|
|
118 |
|
119 |
if st.button("Calcular"):
|
120 |
if customer_code:
|
121 |
-
|
122 |
-
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
st.write(f"
|
127 |
-
|
128 |
-
# Find Customer's Cluster
|
129 |
-
customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
|
130 |
-
|
131 |
-
if not customer_match.empty:
|
132 |
-
cluster = customer_match['cluster_id'].values[0]
|
133 |
-
st.write(f"Customer {customer_code} belongs to cluster {cluster}")
|
134 |
-
else:
|
135 |
-
st.error(f"Customer {customer_code} not found in customer_clusters.")
|
136 |
-
st.stop() # Stop further execution if no cluster is found
|
137 |
|
138 |
# Load the Corresponding Model
|
139 |
model_path = f'models/modelo_cluster_{cluster}.txt'
|
140 |
gbm = lgb.Booster(model_file=model_path)
|
141 |
st.write(f"Loaded model for cluster {cluster}")
|
142 |
|
143 |
-
# Load X_predict for that cluster
|
144 |
X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
|
145 |
|
146 |
-
# Filter for the specific customer
|
147 |
X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
|
148 |
|
149 |
if not X_cliente.empty:
|
|
|
|
|
|
|
150 |
# Make Prediction for the selected customer
|
151 |
-
y_pred = gbm.predict(
|
152 |
|
153 |
# Reassemble the results
|
154 |
results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
|
@@ -156,7 +149,7 @@ elif page == "Customer Analysis":
|
|
156 |
|
157 |
st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
|
158 |
|
159 |
-
#
|
160 |
df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
|
161 |
actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
|
162 |
|
@@ -166,17 +159,17 @@ elif page == "Customer Analysis":
|
|
166 |
how='left')
|
167 |
results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
|
168 |
|
169 |
-
# Calculate metrics
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
|
181 |
# Analysis of results
|
182 |
threshold_good = 100 # You may want to adjust this threshold
|
|
|
118 |
|
119 |
if st.button("Calcular"):
|
120 |
if customer_code:
|
121 |
+
# Find Customer's Cluster
|
122 |
+
customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
|
123 |
|
124 |
+
if not customer_match.empty:
|
125 |
+
cluster = customer_match['cluster_id'].values[0]
|
126 |
+
st.write(f"Customer {customer_code} belongs to cluster {cluster}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
# Load the Corresponding Model
|
129 |
model_path = f'models/modelo_cluster_{cluster}.txt'
|
130 |
gbm = lgb.Booster(model_file=model_path)
|
131 |
st.write(f"Loaded model for cluster {cluster}")
|
132 |
|
133 |
+
# Load X_predict for that cluster
|
134 |
X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
|
135 |
|
136 |
+
# Filter for the specific customer
|
137 |
X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
|
138 |
|
139 |
if not X_cliente.empty:
|
140 |
+
# Prepare data for prediction
|
141 |
+
features_for_prediction = X_cliente.drop(columns=['cliente_id', 'fecha_mes'])
|
142 |
+
|
143 |
# Make Prediction for the selected customer
|
144 |
+
y_pred = gbm.predict(features_for_prediction, num_iteration=gbm.best_iteration)
|
145 |
|
146 |
# Reassemble the results
|
147 |
results = X_cliente[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
|
|
|
149 |
|
150 |
st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
|
151 |
|
152 |
+
# Load actual data
|
153 |
df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
|
154 |
actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
|
155 |
|
|
|
159 |
how='left')
|
160 |
results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
|
161 |
|
162 |
+
# Calculate metrics only for non-null actual sales
|
163 |
+
valid_results = results.dropna(subset=['ventas_reales'])
|
164 |
+
if not valid_results.empty:
|
165 |
+
mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
|
166 |
+
mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
|
167 |
+
rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
|
168 |
+
|
169 |
+
st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
|
170 |
+
st.write(f"MAE: {mae:.2f}")
|
171 |
+
st.write(f"MAPE: {mape:.2f}%")
|
172 |
+
st.write(f"RMSE: {rmse:.2f}")
|
173 |
|
174 |
# Analysis of results
|
175 |
threshold_good = 100 # You may want to adjust this threshold
|