GMARTINEZMILLA commited on
Commit
f764ae8
1 Parent(s): af4b90c

feat: generated files

Browse files
Files changed (2) hide show
  1. app.py +11 -5
  2. test.py +80 -0
app.py CHANGED
@@ -114,16 +114,22 @@ elif page == "Customer Analysis":
114
  # Get percentage of units sold for each manufacturer
115
  all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
116
  all_manufacturers.index = all_manufacturers.index.astype(str)
117
-
118
  # Get total sales for each manufacturer
119
  sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
120
  sales_data.index = sales_data.index.astype(str)
121
 
 
 
 
 
 
 
122
  # Sort manufacturers by percentage of units and get top 10
123
  top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
124
-
125
  # Sort manufacturers by total sales and get top 10
126
- top_sales = sales_data.sort_values(by=sales_data.columns[0], ascending=False).head(10)
127
 
128
  # Combine top manufacturers from both lists
129
  combined_top = pd.concat([top_units, top_sales]).index.unique()
@@ -133,10 +139,10 @@ elif page == "Customer Analysis":
133
  amounts = [] # Will store total sales
134
 
135
  for m in combined_top:
136
- if m in all_manufacturers.index and m in sales_data.index:
137
  values.append(float(all_manufacturers.loc[m, all_manufacturers.columns[0]]))
138
  manufacturers.append(get_supplier_name(m))
139
- amounts.append(float(sales_data.loc[m, sales_data.columns[0]]))
140
 
141
  st.write(f"### Results for top {len(manufacturers)} manufacturers (balanced by units % and total sales):")
142
  for manufacturer, value, amount in zip(manufacturers, values, amounts):
 
114
  # Get percentage of units sold for each manufacturer
115
  all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
116
  all_manufacturers.index = all_manufacturers.index.astype(str)
117
+
118
  # Get total sales for each manufacturer
119
  sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
120
  sales_data.index = sales_data.index.astype(str)
121
 
122
+ # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
123
+ sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
124
+
125
+ # Ensure all values are numeric (optional, but adds robustness)
126
+ sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
127
+
128
  # Sort manufacturers by percentage of units and get top 10
129
  top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
130
+
131
  # Sort manufacturers by total sales and get top 10
132
+ top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
133
 
134
  # Combine top manufacturers from both lists
135
  combined_top = pd.concat([top_units, top_sales]).index.unique()
 
139
  amounts = [] # Will store total sales
140
 
141
  for m in combined_top:
142
+ if m in all_manufacturers.index and m in sales_data_filtered.index:
143
  values.append(float(all_manufacturers.loc[m, all_manufacturers.columns[0]]))
144
  manufacturers.append(get_supplier_name(m))
145
+ amounts.append(float(sales_data_filtered.loc[m, sales_data_filtered.columns[0]]))
146
 
147
  st.write(f"### Results for top {len(manufacturers)} manufacturers (balanced by units % and total sales):")
148
  for manufacturer, value, amount in zip(manufacturers, values, amounts):
test.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ # Simulate loading data
5
+ print("Loading data...")
6
+ df = pd.read_csv(r"D:\01A-TRABAJO\PYTHON\DATASCIENCE\EJERCICIOS\final_project_space\Final_Project\df_clean.csv")
7
+ nombres_proveedores = pd.read_csv(r"D:\01A-TRABAJO\PYTHON\DATASCIENCE\EJERCICIOS\final_project_space\Final_Project\nombres_proveedores.csv", sep=';')
8
+ euros_proveedor = pd.read_csv(r"D:\01A-TRABAJO\PYTHON\DATASCIENCE\EJERCICIOS\final_project_space\Final_Project\euros_proveedor.csv", sep=',')
9
+
10
+ print("\nInitial data types:")
11
+ print(df.dtypes)
12
+ print(nombres_proveedores.dtypes)
13
+ print(euros_proveedor.dtypes)
14
+
15
+ # Convert columns to string
16
+ df['CLIENTE'] = df['CLIENTE'].astype(str)
17
+ nombres_proveedores['codigo'] = nombres_proveedores['codigo'].astype(str)
18
+ euros_proveedor['CLIENTE'] = euros_proveedor['CLIENTE'].astype(str)
19
+
20
+ print("\nData types after conversion:")
21
+ print(df.dtypes)
22
+ print(nombres_proveedores.dtypes)
23
+ print(euros_proveedor.dtypes)
24
+
25
+ # Convert numerical columns in euros_proveedor
26
+ for col in euros_proveedor.columns:
27
+ if col != 'CLIENTE':
28
+ euros_proveedor[col] = pd.to_numeric(euros_proveedor[col], errors='coerce')
29
+
30
+ print("\nData types in euros_proveedor after numeric conversion:")
31
+ print(euros_proveedor.dtypes)
32
+
33
+ # Simulate customer selection
34
+ customer_code = df['CLIENTE'].iloc[0] # Take the first customer as an example
35
+ print(f"\nAnalysis for customer: {customer_code}")
36
+
37
+ customer_data = df[df["CLIENTE"] == str(customer_code)]
38
+ customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
39
+
40
+ print("\nCustomer data:")
41
+ print(customer_data)
42
+ print("\nCustomer euro data:")
43
+ print(customer_euros)
44
+
45
+ # Obtain percentage of units sold by manufacturer
46
+ all_manufacturers = customer_data.iloc[:, 1:].T
47
+ all_manufacturers.index = all_manufacturers.index.astype(str)
48
+
49
+ print("\nAll manufacturers:")
50
+ print(all_manufacturers)
51
+ print(all_manufacturers.dtypes)
52
+
53
+ # Get total sales by manufacturer
54
+ sales_data = customer_euros.iloc[:, 1:].T
55
+ sales_data.index = sales_data.index.astype(str)
56
+
57
+ print("\nSales data:")
58
+ print(sales_data)
59
+ print(sales_data.dtypes)
60
+
61
+ # Remove the 'CLIENTE' row before attempting to sort sales data
62
+ sales_data_filtered = sales_data.drop(index='CLIENTE')
63
+
64
+ # Ensure all values are numeric
65
+ sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
66
+
67
+ # Attempt to sort the sales data after filtering
68
+ try:
69
+ print("\nAttempting to sort the sales data...")
70
+ top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
71
+ print("Sorting successful:")
72
+ print(top_sales)
73
+ except Exception as e:
74
+ print(f"Error sorting: {str(e)}")
75
+ print("Values in the first column:")
76
+ print(sales_data_filtered[sales_data_filtered.columns[0]])
77
+ print("Data types in the first column:")
78
+ print(sales_data_filtered[sales_data_filtered.columns[0]].apply(type))
79
+
80
+ print("\nDebugging script completed.")