Spaces:
Sleeping
Sleeping
GMARTINEZMILLA
commited on
Commit
•
f764ae8
1
Parent(s):
af4b90c
feat: generated files
Browse files
app.py
CHANGED
@@ -114,16 +114,22 @@ elif page == "Customer Analysis":
|
|
114 |
# Get percentage of units sold for each manufacturer
|
115 |
all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
|
116 |
all_manufacturers.index = all_manufacturers.index.astype(str)
|
117 |
-
|
118 |
# Get total sales for each manufacturer
|
119 |
sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
|
120 |
sales_data.index = sales_data.index.astype(str)
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
# Sort manufacturers by percentage of units and get top 10
|
123 |
top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
|
124 |
-
|
125 |
# Sort manufacturers by total sales and get top 10
|
126 |
-
top_sales =
|
127 |
|
128 |
# Combine top manufacturers from both lists
|
129 |
combined_top = pd.concat([top_units, top_sales]).index.unique()
|
@@ -133,10 +139,10 @@ elif page == "Customer Analysis":
|
|
133 |
amounts = [] # Will store total sales
|
134 |
|
135 |
for m in combined_top:
|
136 |
-
if m in all_manufacturers.index and m in
|
137 |
values.append(float(all_manufacturers.loc[m, all_manufacturers.columns[0]]))
|
138 |
manufacturers.append(get_supplier_name(m))
|
139 |
-
amounts.append(float(
|
140 |
|
141 |
st.write(f"### Results for top {len(manufacturers)} manufacturers (balanced by units % and total sales):")
|
142 |
for manufacturer, value, amount in zip(manufacturers, values, amounts):
|
|
|
114 |
# Get percentage of units sold for each manufacturer
|
115 |
all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
|
116 |
all_manufacturers.index = all_manufacturers.index.astype(str)
|
117 |
+
|
118 |
# Get total sales for each manufacturer
|
119 |
sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
|
120 |
sales_data.index = sales_data.index.astype(str)
|
121 |
|
122 |
+
# Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
|
123 |
+
sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
|
124 |
+
|
125 |
+
# Ensure all values are numeric (optional, but adds robustness)
|
126 |
+
sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
|
127 |
+
|
128 |
# Sort manufacturers by percentage of units and get top 10
|
129 |
top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
|
130 |
+
|
131 |
# Sort manufacturers by total sales and get top 10
|
132 |
+
top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
|
133 |
|
134 |
# Combine top manufacturers from both lists
|
135 |
combined_top = pd.concat([top_units, top_sales]).index.unique()
|
|
|
139 |
amounts = [] # Will store total sales
|
140 |
|
141 |
for m in combined_top:
|
142 |
+
if m in all_manufacturers.index and m in sales_data_filtered.index:
|
143 |
values.append(float(all_manufacturers.loc[m, all_manufacturers.columns[0]]))
|
144 |
manufacturers.append(get_supplier_name(m))
|
145 |
+
amounts.append(float(sales_data_filtered.loc[m, sales_data_filtered.columns[0]]))
|
146 |
|
147 |
st.write(f"### Results for top {len(manufacturers)} manufacturers (balanced by units % and total sales):")
|
148 |
for manufacturer, value, amount in zip(manufacturers, values, amounts):
|
test.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
# Simulate loading data
|
5 |
+
print("Loading data...")
|
6 |
+
df = pd.read_csv(r"D:\01A-TRABAJO\PYTHON\DATASCIENCE\EJERCICIOS\final_project_space\Final_Project\df_clean.csv")
|
7 |
+
nombres_proveedores = pd.read_csv(r"D:\01A-TRABAJO\PYTHON\DATASCIENCE\EJERCICIOS\final_project_space\Final_Project\nombres_proveedores.csv", sep=';')
|
8 |
+
euros_proveedor = pd.read_csv(r"D:\01A-TRABAJO\PYTHON\DATASCIENCE\EJERCICIOS\final_project_space\Final_Project\euros_proveedor.csv", sep=',')
|
9 |
+
|
10 |
+
print("\nInitial data types:")
|
11 |
+
print(df.dtypes)
|
12 |
+
print(nombres_proveedores.dtypes)
|
13 |
+
print(euros_proveedor.dtypes)
|
14 |
+
|
15 |
+
# Convert columns to string
|
16 |
+
df['CLIENTE'] = df['CLIENTE'].astype(str)
|
17 |
+
nombres_proveedores['codigo'] = nombres_proveedores['codigo'].astype(str)
|
18 |
+
euros_proveedor['CLIENTE'] = euros_proveedor['CLIENTE'].astype(str)
|
19 |
+
|
20 |
+
print("\nData types after conversion:")
|
21 |
+
print(df.dtypes)
|
22 |
+
print(nombres_proveedores.dtypes)
|
23 |
+
print(euros_proveedor.dtypes)
|
24 |
+
|
25 |
+
# Convert numerical columns in euros_proveedor
|
26 |
+
for col in euros_proveedor.columns:
|
27 |
+
if col != 'CLIENTE':
|
28 |
+
euros_proveedor[col] = pd.to_numeric(euros_proveedor[col], errors='coerce')
|
29 |
+
|
30 |
+
print("\nData types in euros_proveedor after numeric conversion:")
|
31 |
+
print(euros_proveedor.dtypes)
|
32 |
+
|
33 |
+
# Simulate customer selection
|
34 |
+
customer_code = df['CLIENTE'].iloc[0] # Take the first customer as an example
|
35 |
+
print(f"\nAnalysis for customer: {customer_code}")
|
36 |
+
|
37 |
+
customer_data = df[df["CLIENTE"] == str(customer_code)]
|
38 |
+
customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
|
39 |
+
|
40 |
+
print("\nCustomer data:")
|
41 |
+
print(customer_data)
|
42 |
+
print("\nCustomer euro data:")
|
43 |
+
print(customer_euros)
|
44 |
+
|
45 |
+
# Obtain percentage of units sold by manufacturer
|
46 |
+
all_manufacturers = customer_data.iloc[:, 1:].T
|
47 |
+
all_manufacturers.index = all_manufacturers.index.astype(str)
|
48 |
+
|
49 |
+
print("\nAll manufacturers:")
|
50 |
+
print(all_manufacturers)
|
51 |
+
print(all_manufacturers.dtypes)
|
52 |
+
|
53 |
+
# Get total sales by manufacturer
|
54 |
+
sales_data = customer_euros.iloc[:, 1:].T
|
55 |
+
sales_data.index = sales_data.index.astype(str)
|
56 |
+
|
57 |
+
print("\nSales data:")
|
58 |
+
print(sales_data)
|
59 |
+
print(sales_data.dtypes)
|
60 |
+
|
61 |
+
# Remove the 'CLIENTE' row before attempting to sort sales data
|
62 |
+
sales_data_filtered = sales_data.drop(index='CLIENTE')
|
63 |
+
|
64 |
+
# Ensure all values are numeric
|
65 |
+
sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
|
66 |
+
|
67 |
+
# Attempt to sort the sales data after filtering
|
68 |
+
try:
|
69 |
+
print("\nAttempting to sort the sales data...")
|
70 |
+
top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
|
71 |
+
print("Sorting successful:")
|
72 |
+
print(top_sales)
|
73 |
+
except Exception as e:
|
74 |
+
print(f"Error sorting: {str(e)}")
|
75 |
+
print("Values in the first column:")
|
76 |
+
print(sales_data_filtered[sales_data_filtered.columns[0]])
|
77 |
+
print("Data types in the first column:")
|
78 |
+
print(sales_data_filtered[sales_data_filtered.columns[0]].apply(type))
|
79 |
+
|
80 |
+
print("\nDebugging script completed.")
|