Spaces:

zama-fhe
/

encrypted_credit_scoring

Running on CPU Upgrade

romanbredehoft-zama commited on Nov 28, 2023

Commit

a241bb3

•

1 Parent(s): 993f2a6

Impose correct column order in pre-processing

Files changed (6) hide show

backend.py CHANGED Viewed

@@ -21,6 +21,8 @@ from settings import (
     PRE_PROCESSOR_USER_PATH,
     PRE_PROCESSOR_THIRD_PARTY_PATH,
     CLIENT_TYPES,
 )
 from utils.client_server_interface import MultiInputsFHEModelClient
@@ -270,6 +272,8 @@ def pre_process_encrypt_send_user(client_id, *inputs):
         "Housing_type": [housing_type],
     })
     preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
     return _encrypt_send(client_id, preprocessed_user_inputs, "user")
@@ -311,6 +315,8 @@ def pre_process_encrypt_send_third_party(client_id, *inputs):
         "Years_employed": [years_salaried],
     })
     preprocessed_third_party_inputs = PRE_PROCESSOR_THIRD_PARTY.transform(third_party_inputs)
     return _encrypt_send(client_id, preprocessed_third_party_inputs, "third_party")

     PRE_PROCESSOR_USER_PATH,
     PRE_PROCESSOR_THIRD_PARTY_PATH,
     CLIENT_TYPES,
+    USER_COLUMNS,
+    THIRD_PARTY_COLUMNS,
 )
 from utils.client_server_interface import MultiInputsFHEModelClient
         "Housing_type": [housing_type],
     })
+    user_inputs = user_inputs.reindex(USER_COLUMNS, axis=1)
     preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
     return _encrypt_send(client_id, preprocessed_user_inputs, "user")
         "Years_employed": [years_salaried],
     })
+    third_party_inputs = third_party_inputs.reindex(THIRD_PARTY_COLUMNS, axis=1)
     preprocessed_third_party_inputs = PRE_PROCESSOR_THIRD_PARTY.transform(third_party_inputs)
     return _encrypt_send(client_id, preprocessed_third_party_inputs, "third_party")

deployment_files/client.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4c826efceb2e6c4d9fd1d3876d7adae10537814add6ae3f08b5dab9ae23f76b
-size 76339

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c0a655d225d0f31642c20c8f3e5537505b6b6904ad8af7636631024cf6e18b6
+size 76383

deployment_files/server.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:adc3d696a290278148d2ac906018a3a58d3c545290f6fdb60a82a3f2e7eea531
-size 3322

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5570f7dfda2d5ced4a6bd411d9d2eba67b8bcbd523efac803be66abd4368a99
+size 3321

development.py CHANGED Viewed

@@ -9,10 +9,20 @@ from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 from imblearn.over_sampling import SMOTE
-from settings import DEPLOYMENT_PATH, RANDOM_STATE, DATA_PATH, INPUT_SLICES, PRE_PROCESSOR_USER_PATH, PRE_PROCESSOR_THIRD_PARTY_PATH
 from utils.client_server_interface import MultiInputsFHEModelDev
 from utils.model import MultiInputXGBClassifier
-from utils.pre_processing import get_pre_processors, select_and_pop_features
 def get_processed_multi_inputs(data):
@@ -39,9 +49,9 @@ data_y = data.pop("Target").copy()
 data_x = data.copy()
 # Get data from all parties
-data_third_party = select_and_pop_features(data_x, ["Years_employed", "Salaried"])
-data_bank = select_and_pop_features(data_x, ["Account_length"])
-data_user = data_x.copy()
 # Feature engineer the data
 pre_processor_user, pre_processor_third_party = get_pre_processors()

 from sklearn.metrics import accuracy_score
 from imblearn.over_sampling import SMOTE
+from settings import (
+    DEPLOYMENT_PATH,
+    RANDOM_STATE,
+    DATA_PATH,
+    INPUT_SLICES,
+    PRE_PROCESSOR_USER_PATH,
+    PRE_PROCESSOR_THIRD_PARTY_PATH,
+    USER_COLUMNS,
+    BANK_COLUMNS,
+    THIRD_PARTY_COLUMNS,
+)
 from utils.client_server_interface import MultiInputsFHEModelDev
 from utils.model import MultiInputXGBClassifier
+from utils.pre_processing import get_pre_processors
 def get_processed_multi_inputs(data):
 data_x = data.copy()
 # Get data from all parties
+data_user = data_x[USER_COLUMNS].copy()
+data_bank = data_x[BANK_COLUMNS].copy()
+data_third_party = data_x[THIRD_PARTY_COLUMNS].copy()
 # Feature engineer the data
 pre_processor_user, pre_processor_third_party = get_pre_processors()

settings.py CHANGED Viewed

@@ -29,7 +29,7 @@ SERVER_URL = "http://localhost:8000/"
 # files
 DATA_PATH = "data/data.csv"
-# Developement settings
 RANDOM_STATE = 0
 INITIAL_INPUT_SHAPE = (1, 49)
@@ -45,6 +45,14 @@ INPUT_SLICES = {
     "third_party": slice(43, 49),  # Third position: start from n_feature_user + n_feature_bank
 }
 _data = pandas.read_csv(DATA_PATH, encoding="utf-8")
 def get_min_max(data, column):

 # files
 DATA_PATH = "data/data.csv"
+# Development settings
 RANDOM_STATE = 0
 INITIAL_INPUT_SHAPE = (1, 49)
     "third_party": slice(43, 49),  # Third position: start from n_feature_user + n_feature_bank
 }
+USER_COLUMNS = [
+    'Own_car', 'Own_property', 'Work_phone', 'Phone', 'Email', 'Num_children', 'Household_size',
+    'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type',
+    'Occupation_type',
+]
+BANK_COLUMNS = ["Account_length"]
+THIRD_PARTY_COLUMNS = ["Years_employed", "Salaried"]
 _data = pandas.read_csv(DATA_PATH, encoding="utf-8")
 def get_min_max(data, column):

utils/pre_processing.py CHANGED Viewed

@@ -83,10 +83,4 @@ def get_pre_processors():
         verbose_feature_names_out=False,
     )
-    return pre_processor_user, pre_processor_third_party
-def select_and_pop_features(data, columns):
-    new_data = data[columns].copy()
-    data.drop(columns, axis=1, inplace=True)
-    return new_data

         verbose_feature_names_out=False,
     )
+    return pre_processor_user, pre_processor_third_party