Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
romanbredehoft-zama
commited on
Commit
•
993f2a6
1
Parent(s):
747c295
Replace outdated terminologies and remove some features
Browse files- app.py +1 -2
- backend.py +13 -19
- data/clean_data.csv +0 -0
- data/data.csv +0 -0
- deployment_files/client.zip +2 -2
- deployment_files/pre_processor_third_party.pkl +2 -2
- deployment_files/pre_processor_user.pkl +2 -2
- deployment_files/server.zip +2 -2
- development.py +2 -2
- settings.py +4 -5
- utils/pre_processing.py +4 -4
app.py
CHANGED
@@ -77,7 +77,6 @@ with demo:
|
|
77 |
with gr.Row():
|
78 |
with gr.Column():
|
79 |
gr.Markdown("### User")
|
80 |
-
gender = gr.Radio(["Female", "Male"], label="Gender", value="Female")
|
81 |
bool_inputs = gr.CheckboxGroup(["Car", "Property", "Work phone", "Phone", "Email"], label="What do you own ?")
|
82 |
num_children = gr.Slider(**CHILDREN_MIN_MAX, step=1, label="Number of children", info="How many children do you have ?")
|
83 |
household_size = gr.Slider(**FAMILY_MIN_MAX, step=1, label="Household size", info="How many members does your household have? ?")
|
@@ -185,7 +184,7 @@ with demo:
|
|
185 |
# side to the server
|
186 |
encrypt_button_user.click(
|
187 |
pre_process_encrypt_send_user,
|
188 |
-
inputs=[client_id,
|
189 |
income_type, education_type, family_status, occupation_type, housing_type],
|
190 |
outputs=[encrypted_input_user],
|
191 |
)
|
|
|
77 |
with gr.Row():
|
78 |
with gr.Column():
|
79 |
gr.Markdown("### User")
|
|
|
80 |
bool_inputs = gr.CheckboxGroup(["Car", "Property", "Work phone", "Phone", "Email"], label="What do you own ?")
|
81 |
num_children = gr.Slider(**CHILDREN_MIN_MAX, step=1, label="Number of children", info="How many children do you have ?")
|
82 |
household_size = gr.Slider(**FAMILY_MIN_MAX, step=1, label="Household size", info="How many members does your household have? ?")
|
|
|
184 |
# side to the server
|
185 |
encrypt_button_user.click(
|
186 |
pre_process_encrypt_send_user,
|
187 |
+
inputs=[client_id, bool_inputs, num_children, household_size, total_income, age, \
|
188 |
income_type, education_type, family_status, occupation_type, housing_type],
|
189 |
outputs=[encrypted_input_user],
|
190 |
)
|
backend.py
CHANGED
@@ -243,12 +243,8 @@ def pre_process_encrypt_send_user(client_id, *inputs):
|
|
243 |
(int, bytes): Integer ID representing the current client and a byte short representation of
|
244 |
the encrypted input to send.
|
245 |
"""
|
246 |
-
|
247 |
family_status, occupation_type, housing_type = inputs
|
248 |
-
|
249 |
-
# Encoding given in https://www.kaggle.com/code/samuelcortinhas/credit-cards-data-cleaning
|
250 |
-
# for "Gender" is M ('Male') -> 1 and F ('Female') -> 0
|
251 |
-
gender = gender == "Male"
|
252 |
|
253 |
# Retrieve boolean values
|
254 |
own_car = "Car" in bool_inputs
|
@@ -258,21 +254,20 @@ def pre_process_encrypt_send_user(client_id, *inputs):
|
|
258 |
email = "Email" in bool_inputs
|
259 |
|
260 |
user_inputs = pandas.DataFrame({
|
261 |
-
"Gender": [gender],
|
262 |
"Own_car": [own_car],
|
263 |
"Own_property": [own_property],
|
264 |
"Work_phone": [work_phone],
|
265 |
"Phone": [phone],
|
266 |
"Email": [email],
|
267 |
-
"Num_children": num_children,
|
268 |
-
"
|
269 |
-
"Total_income": total_income,
|
270 |
-
"Age": age,
|
271 |
-
"Income_type": income_type,
|
272 |
-
"Education_type": education_type,
|
273 |
-
"Family_status": family_status,
|
274 |
-
"Occupation_type": occupation_type,
|
275 |
-
"Housing_type": housing_type,
|
276 |
})
|
277 |
|
278 |
preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
|
@@ -308,12 +303,11 @@ def pre_process_encrypt_send_third_party(client_id, *inputs):
|
|
308 |
the encrypted input to send.
|
309 |
"""
|
310 |
salaried, years_salaried = inputs
|
311 |
-
|
312 |
-
|
313 |
-
unemployed = salaried == "No"
|
314 |
|
315 |
third_party_inputs = pandas.DataFrame({
|
316 |
-
"
|
317 |
"Years_employed": [years_salaried],
|
318 |
})
|
319 |
|
|
|
243 |
(int, bytes): Integer ID representing the current client and a byte short representation of
|
244 |
the encrypted input to send.
|
245 |
"""
|
246 |
+
bool_inputs, num_children, household_size, total_income, age, income_type, education_type, \
|
247 |
family_status, occupation_type, housing_type = inputs
|
|
|
|
|
|
|
|
|
248 |
|
249 |
# Retrieve boolean values
|
250 |
own_car = "Car" in bool_inputs
|
|
|
254 |
email = "Email" in bool_inputs
|
255 |
|
256 |
user_inputs = pandas.DataFrame({
|
|
|
257 |
"Own_car": [own_car],
|
258 |
"Own_property": [own_property],
|
259 |
"Work_phone": [work_phone],
|
260 |
"Phone": [phone],
|
261 |
"Email": [email],
|
262 |
+
"Num_children": [num_children],
|
263 |
+
"Household_size": [household_size],
|
264 |
+
"Total_income": [total_income],
|
265 |
+
"Age": [age],
|
266 |
+
"Income_type": [income_type],
|
267 |
+
"Education_type": [education_type],
|
268 |
+
"Family_status": [family_status],
|
269 |
+
"Occupation_type": [occupation_type],
|
270 |
+
"Housing_type": [housing_type],
|
271 |
})
|
272 |
|
273 |
preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
|
|
|
303 |
the encrypted input to send.
|
304 |
"""
|
305 |
salaried, years_salaried = inputs
|
306 |
+
|
307 |
+
is_salaried = salaried == "Yes"
|
|
|
308 |
|
309 |
third_party_inputs = pandas.DataFrame({
|
310 |
+
"Salaried": [is_salaried],
|
311 |
"Years_employed": [years_salaried],
|
312 |
})
|
313 |
|
data/clean_data.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
data/data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
deployment_files/client.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4c826efceb2e6c4d9fd1d3876d7adae10537814add6ae3f08b5dab9ae23f76b
|
3 |
+
size 76339
|
deployment_files/pre_processor_third_party.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:999ae2b4f9420d6b2e2987035cde0f43e93b2edb85ac32b3c877584b45871ca8
|
3 |
+
size 1588
|
deployment_files/pre_processor_user.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8454f6f929f89b5dec427d9f6e522a33cde0a49c8dc6f06a650bb0bf90b59913
|
3 |
+
size 6221
|
deployment_files/server.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adc3d696a290278148d2ac906018a3a58d3c545290f6fdb60a82a3f2e7eea531
|
3 |
+
size 3322
|
development.py
CHANGED
@@ -31,7 +31,7 @@ print("Load and pre-process the data")
|
|
31 |
# A few additional pre-processing steps has bee applied to this data set as well :
|
32 |
# - "ID" column has been removed
|
33 |
# - "Total_income" values have been multiplied by 0.14 to make its median match France's annual
|
34 |
-
# salary one from 2023 (
|
35 |
data = pandas.read_csv(DATA_PATH, encoding="utf-8")
|
36 |
|
37 |
# Define input and target data
|
@@ -39,7 +39,7 @@ data_y = data.pop("Target").copy()
|
|
39 |
data_x = data.copy()
|
40 |
|
41 |
# Get data from all parties
|
42 |
-
data_third_party = select_and_pop_features(data_x, ["Years_employed", "
|
43 |
data_bank = select_and_pop_features(data_x, ["Account_length"])
|
44 |
data_user = data_x.copy()
|
45 |
|
|
|
31 |
# A few additional pre-processing steps has bee applied to this data set as well :
|
32 |
# - "ID" column has been removed
|
33 |
# - "Total_income" values have been multiplied by 0.14 to make its median match France's annual
|
34 |
+
# salary one from 2023 (22050 euros)
|
35 |
data = pandas.read_csv(DATA_PATH, encoding="utf-8")
|
36 |
|
37 |
# Define input and target data
|
|
|
39 |
data_x = data.copy()
|
40 |
|
41 |
# Get data from all parties
|
42 |
+
data_third_party = select_and_pop_features(data_x, ["Years_employed", "Salaried"])
|
43 |
data_bank = select_and_pop_features(data_x, ["Account_length"])
|
44 |
data_user = data_x.copy()
|
45 |
|
settings.py
CHANGED
@@ -25,10 +25,9 @@ SERVER_FILES.mkdir(exist_ok=True)
|
|
25 |
SERVER_URL = "http://localhost:8000/"
|
26 |
|
27 |
# Path to data file
|
28 |
-
#
|
29 |
-
#
|
30 |
-
|
31 |
-
DATA_PATH = "data/clean_data.csv"
|
32 |
|
33 |
# Developement settings
|
34 |
RANDOM_STATE = 0
|
@@ -61,7 +60,7 @@ CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
|
|
61 |
INCOME_MIN_MAX = get_min_max(_data, "Total_income")
|
62 |
AGE_MIN_MAX = get_min_max(_data, "Age")
|
63 |
SALARIED_MIN_MAX = get_min_max(_data, "Years_employed")
|
64 |
-
FAMILY_MIN_MAX = get_min_max(_data, "
|
65 |
|
66 |
# App data choices
|
67 |
INCOME_TYPES = list(_data["Income_type"].unique())
|
|
|
25 |
SERVER_URL = "http://localhost:8000/"
|
26 |
|
27 |
# Path to data file
|
28 |
+
# Details about pre-processing steps can be found in the 'development.py' and 'pre_processing.py'
|
29 |
+
# files
|
30 |
+
DATA_PATH = "data/data.csv"
|
|
|
31 |
|
32 |
# Developement settings
|
33 |
RANDOM_STATE = 0
|
|
|
60 |
INCOME_MIN_MAX = get_min_max(_data, "Total_income")
|
61 |
AGE_MIN_MAX = get_min_max(_data, "Age")
|
62 |
SALARIED_MIN_MAX = get_min_max(_data, "Years_employed")
|
63 |
+
FAMILY_MIN_MAX = get_min_max(_data, "Household_size")
|
64 |
|
65 |
# App data choices
|
66 |
INCOME_TYPES = list(_data["Income_type"].unique())
|
utils/pre_processing.py
CHANGED
@@ -41,13 +41,13 @@ def get_pre_processors():
|
|
41 |
['Num_children']
|
42 |
),
|
43 |
(
|
44 |
-
"
|
45 |
_get_pipeline_replace_one_hot(_replace_values_geq, 3),
|
46 |
-
['
|
47 |
),
|
48 |
(
|
49 |
"replace_income_type",
|
50 |
-
_get_pipeline_replace_one_hot(_replace_values_eq, {"
|
51 |
['Income_type']
|
52 |
),
|
53 |
(
|
@@ -60,7 +60,7 @@ def get_pre_processors():
|
|
60 |
_get_pipeline_replace_one_hot(
|
61 |
_replace_values_eq,
|
62 |
{
|
63 |
-
"Labor_work": ["Cleaning staff", "Cooking staff", "Drivers", "Laborers", "Low-
|
64 |
"Office_work": ["Accountants", "Core staff", "HR staff", "Medicine staff", "Private service staff", "Realty agents", "Sales staff", "Secretaries"],
|
65 |
"High_tech_work": ["Managers", "High skill tech staff", "IT staff"],
|
66 |
},
|
|
|
41 |
['Num_children']
|
42 |
),
|
43 |
(
|
44 |
+
"replace_household_size",
|
45 |
_get_pipeline_replace_one_hot(_replace_values_geq, 3),
|
46 |
+
['Household_size']
|
47 |
),
|
48 |
(
|
49 |
"replace_income_type",
|
50 |
+
_get_pipeline_replace_one_hot(_replace_values_eq, {"Public Sector": ["Retired", "Student"]}),
|
51 |
['Income_type']
|
52 |
),
|
53 |
(
|
|
|
60 |
_get_pipeline_replace_one_hot(
|
61 |
_replace_values_eq,
|
62 |
{
|
63 |
+
"Labor_work": ["Cleaning staff", "Cooking staff", "Drivers", "Laborers", "Low-wage laborers", "Security staff", "Waiters/barmen staff"],
|
64 |
"Office_work": ["Accountants", "Core staff", "HR staff", "Medicine staff", "Private service staff", "Realty agents", "Sales staff", "Secretaries"],
|
65 |
"High_tech_work": ["Managers", "High skill tech staff", "IT staff"],
|
66 |
},
|