Spaces:

zama-fhe
/

encrypted_credit_scoring

Running on CPU Upgrade

App Files Files Community

romanbredehoft-zama commited on Dec 11, 2023

Commit

74c0c8e

•

1 Parent(s): b0303a0

Add second model for optional explainability step

Browse files

Files changed (13) hide show

app.py +69 -40
backend.py +171 -36
deployment_files/{client.zip → approval_model/client.zip} +2 -2
deployment_files/{server.zip → approval_model/server.zip} +2 -2
deployment_files/{versions.json → approval_model/versions.json} +0 -0
deployment_files/explain_model/client.zip +3 -0
deployment_files/explain_model/server.zip +3 -0
deployment_files/explain_model/versions.json +1 -0
development.py +78 -17
server.py +2 -2
settings.py +16 -4
utils/client_server_interface.py +30 -11
utils/model.py +5 -43

app.py CHANGED Viewed

@@ -26,6 +26,7 @@ from backend import (
     run_fhe,
     get_output,
     decrypt_output,
 )
@@ -60,6 +61,12 @@ with demo:
     )
     client_id = gr.Textbox(label="", max_lines=2, interactive=False, visible=False)
     gr.Markdown("## Step 2: Fill in some information.")
     gr.Markdown(
         """
@@ -125,6 +132,31 @@ with demo:
                 label="Encrypted input representation:", max_lines=2, interactive=False
             )
     gr.Markdown("# Server side")
     gr.Markdown(
         """
@@ -142,6 +174,9 @@ with demo:
         label="Total FHE execution time (in seconds):", max_lines=1, interactive=False
     )
     gr.Markdown("# Client side")
     gr.Markdown(
         """
@@ -161,6 +196,13 @@ with demo:
         label="Encrypted output representation: ", max_lines=2, interactive=False
     )
     gr.Markdown("## Step 6: Decrypt the output.")
     gr.Markdown(
         """
@@ -173,52 +215,39 @@ with demo:
         label="Prediction", max_lines=1, interactive=False
     )
-    # Button generate the keys
-    keygen_button.click(
-        keygen_send,
-        outputs=[client_id, evaluation_key, keygen_button],
-    )
-    # Button to pre-process, generate the key, encrypt and send the user inputs from the client
-    # side to the server
-    encrypt_button_user.click(
-        pre_process_encrypt_send_user,
-        inputs=[client_id, bool_inputs, num_children, household_size, total_income, age, \
-                income_type, education_type, family_status, occupation_type, housing_type],
-        outputs=[encrypted_input_user],
     )
-    # Button to pre-process, generate the key, encrypt and send the bank inputs from the client
-    # side to the server
-    encrypt_button_bank.click(
-        pre_process_encrypt_send_bank,
-        inputs=[client_id, account_age],
-        outputs=[encrypted_input_bank],
     )
-    # Button to pre-process, generate the key, encrypt and send the third party inputs from the
-    # client side to the server
-    encrypt_button_third_party.click(
-        pre_process_encrypt_send_third_party,
-        inputs=[client_id, employed, years_employed],
-        outputs=[encrypted_input_third_party],
     )
-    # Button to send the encodings to the server using post method
-    execute_fhe_button.click(run_fhe, inputs=[client_id], outputs=[fhe_execution_time])
-    # Button to send the encodings to the server using post method
-    get_output_button.click(
-        get_output,
-        inputs=[client_id],
-        outputs=[encrypted_output_representation],
     )
-    # Button to decrypt the output
-    decrypt_button.click(
-        decrypt_output,
-        inputs=[client_id],
-        outputs=[prediction_output],
     )
     gr.Markdown(

     run_fhe,
     get_output,
     decrypt_output,
+    years_employed_encrypt_run_decrypt,
 )
     )
     client_id = gr.Textbox(label="", max_lines=2, interactive=False, visible=False)
+    # Button generate the keys
+    keygen_button.click(
+        keygen_send,
+        outputs=[client_id, evaluation_key, keygen_button],
+    )
     gr.Markdown("## Step 2: Fill in some information.")
     gr.Markdown(
         """
                 label="Encrypted input representation:", max_lines=2, interactive=False
             )
+    # Button to pre-process, generate the key, encrypt and send the user inputs from the client
+    # side to the server
+    encrypt_button_user.click(
+        pre_process_encrypt_send_user,
+        inputs=[client_id, bool_inputs, num_children, household_size, total_income, age, \
+                income_type, education_type, family_status, occupation_type, housing_type],
+        outputs=[encrypted_input_user],
+    )
+    # Button to pre-process, generate the key, encrypt and send the bank inputs from the client
+    # side to the server
+    encrypt_button_bank.click(
+        pre_process_encrypt_send_bank,
+        inputs=[client_id, account_age],
+        outputs=[encrypted_input_bank],
+    )
+    # Button to pre-process, generate the key, encrypt and send the third party inputs from the
+    # client side to the server
+    encrypt_button_third_party.click(
+        pre_process_encrypt_send_third_party,
+        inputs=[client_id, employed, years_employed],
+        outputs=[encrypted_input_third_party],
+    )
     gr.Markdown("# Server side")
     gr.Markdown(
         """
         label="Total FHE execution time (in seconds):", max_lines=1, interactive=False
     )
+    # Button to send the encodings to the server using post method
+    execute_fhe_button.click(run_fhe, inputs=[client_id], outputs=[fhe_execution_time])
     gr.Markdown("# Client side")
     gr.Markdown(
         """
         label="Encrypted output representation: ", max_lines=2, interactive=False
     )
+    # Button to send the encodings to the server using post method
+    get_output_button.click(
+        get_output,
+        inputs=[client_id],
+        outputs=[encrypted_output_representation],
+    )
     gr.Markdown("## Step 6: Decrypt the output.")
     gr.Markdown(
         """
         label="Prediction", max_lines=1, interactive=False
     )
+    # Button to decrypt the output
+    decrypt_button.click(
+        decrypt_output,
+        inputs=[client_id],
+        outputs=[prediction_output],
     )
+    gr.Markdown("## Step 7 (optional): Explain the prediction.")
+    gr.Markdown(
+        """
+        In case the credit card is likely to be denied, the user can run a second model in order to
+        Explain the prediction better. More specifically, this new model indicates the number of
+        additional years of employment that could be required in order to increase the chance of
+        credit card approval.
+        All of the above steps are combined into a single button for simplicity. The following
+        button therefore encrypts the same inputs (except the years of employment) from all three
+        parties, runs the new prediction in FHE and decrypts the output.
+        """
     )
+    years_employed_prediction_button = gr.Button(
+        "Encrypt the inputs, compute in FHE and decrypt the output."
     )
+    years_employed_prediction = gr.Textbox(
+        label="Additional years of employed required.", max_lines=1, interactive=False
     )
+    # Button to explain the prediction
+    years_employed_prediction_button.click(
+        years_employed_encrypt_run_decrypt,
+        inputs=[client_id, prediction_output, bool_inputs, num_children, household_size, \
+                total_income, age, income_type, education_type, family_status, occupation_type, \
+                housing_type, account_age, employed, years_employed],
+        outputs=[years_employed_prediction],
     )
     gr.Markdown(

backend.py CHANGED Viewed

@@ -14,20 +14,26 @@ from settings import (
     FHE_KEYS,
     CLIENT_FILES,
     SERVER_FILES,
-    DEPLOYMENT_PATH,
-    PROCESSED_INPUT_SHAPE,
     INPUT_INDEXES,
-    INPUT_SLICES,
     PRE_PROCESSOR_USER_PATH,
     PRE_PROCESSOR_BANK_PATH,
     PRE_PROCESSOR_THIRD_PARTY_PATH,
     CLIENT_TYPES,
     USER_COLUMNS,
     BANK_COLUMNS,
-    THIRD_PARTY_COLUMNS,
 )
-from utils.client_server_interface import MultiInputsFHEModelClient
 # Load pre-processor instances
 with (
@@ -87,18 +93,22 @@ def clean_temporary_files(n_keys=20):
                 shutil.rmtree(directory)
-def _get_client(client_id):
     """Get the client instance.
     Args:
         client_id (int): The client ID to consider.
     Returns:
         FHEModelClient: The client instance.
     """
-    key_dir = FHE_KEYS / f"{client_id}"
-    return MultiInputsFHEModelClient(DEPLOYMENT_PATH, key_dir=key_dir, nb_inputs=len(CLIENT_TYPES))
 def _get_client_file_path(name, client_id, client_type=None):
@@ -196,7 +206,7 @@ def keygen_send():
     return client_id, evaluation_key_short, gr.update(value="Keys are generated and evaluation key is sent ✅")
-def _encrypt_send(client_id, inputs, client_type):
     """Encrypt the given inputs for a specific client and send it to the server.
     Args:
@@ -205,8 +215,7 @@ def _encrypt_send(client_id, inputs, client_type):
         client_type (str): The type of client to consider (either 'user', 'bank' or 'third_party').
     Returns:
-        client_id, encrypted_inputs_short (int, bytes): Integer ID representing the current client
-            and a byte short representation of the encrypted input to send.
     """
     if client_id == "":
         raise gr.Error("Please generate the keys first.")
@@ -218,8 +227,8 @@ def _encrypt_send(client_id, inputs, client_type):
     encrypted_inputs = client.quantize_encrypt_serialize_multi_inputs(
         inputs,
         input_index=INPUT_INDEXES[client_type],
-        processed_input_shape=PROCESSED_INPUT_SHAPE,
-        input_slice=INPUT_SLICES[client_type],
     )
     file_name = "encrypted_inputs"
@@ -239,16 +248,14 @@ def _encrypt_send(client_id, inputs, client_type):
     return encrypted_inputs_short
-def pre_process_encrypt_send_user(client_id, *inputs):
-    """Pre-process, encrypt and send the user inputs for a specific client to the server.
     Args:
-        client_id (str): The current client ID to consider.
         *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
     Returns:
-        (int, bytes): Integer ID representing the current client and a byte short representation of
-            the encrypted input to send.
     """
     bool_inputs, num_children, household_size, total_income, age, income_type, education_type, \
         family_status, occupation_type, housing_type = inputs
@@ -277,19 +284,32 @@ def pre_process_encrypt_send_user(client_id, *inputs):
     preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
     return _encrypt_send(client_id, preprocessed_user_inputs, "user")
-def pre_process_encrypt_send_bank(client_id, *inputs):
-    """Pre-process, encrypt and send the bank inputs for a specific client to the server.
     Args:
-        client_id (str): The current client ID to consider.
         *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
     Returns:
-        (int, bytes): Integer ID representing the current client and a byte short representation of
-            the encrypted input to send.
     """
     account_age = inputs[0]
@@ -301,32 +321,65 @@ def pre_process_encrypt_send_bank(client_id, *inputs):
     preprocessed_bank_inputs = PRE_PROCESSOR_BANK.transform(bank_inputs)
     return _encrypt_send(client_id, preprocessed_bank_inputs, "bank")
-def pre_process_encrypt_send_third_party(client_id, *inputs):
-    """Pre-process, encrypt and send the third party inputs for a specific client to the server.
     Args:
-        client_id (str): The current client ID to consider.
         *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
     Returns:
-        (int, bytes): Integer ID representing the current client and a byte short representation of
-            the encrypted input to send.
     """
-    employed, years_employed = inputs
     is_employed = employed == "Yes"
-    third_party_inputs = pandas.DataFrame({
-        "Employed": [is_employed],
-        "Years_employed": [years_employed],
-    })
-    third_party_inputs = third_party_inputs.reindex(THIRD_PARTY_COLUMNS, axis=1)
-    preprocessed_third_party_inputs = PRE_PROCESSOR_THIRD_PARTY.transform(third_party_inputs)
     return _encrypt_send(client_id, preprocessed_third_party_inputs, "third_party")
@@ -430,4 +483,86 @@ def decrypt_output(client_id):
     # Determine the predicted class
     output = numpy.argmax(output_proba, axis=1).squeeze()
-    return "Credit card is likely to be approved ✅" if output == 1 else "Credit card is likely to be denied ❌"

     FHE_KEYS,
     CLIENT_FILES,
     SERVER_FILES,
+    APPROVAL_DEPLOYMENT_PATH,
+    EXPLAIN_DEPLOYMENT_PATH,
+    APPROVAL_PROCESSED_INPUT_SHAPE,
+    EXPLAIN_PROCESSED_INPUT_SHAPE,
     INPUT_INDEXES,
+    APPROVAL_INPUT_SLICES,
+    EXPLAIN_INPUT_SLICES,
     PRE_PROCESSOR_USER_PATH,
     PRE_PROCESSOR_BANK_PATH,
     PRE_PROCESSOR_THIRD_PARTY_PATH,
     CLIENT_TYPES,
     USER_COLUMNS,
     BANK_COLUMNS,
+    APPROVAL_THIRD_PARTY_COLUMNS,
 )
+from utils.client_server_interface import MultiInputsFHEModelClient, MultiInputsFHEModelServer
+# Load the server used for explaining the prediction
+EXPLAIN_FHE_SERVER = MultiInputsFHEModelServer(EXPLAIN_DEPLOYMENT_PATH)
 # Load pre-processor instances
 with (
                 shutil.rmtree(directory)
+def _get_client(client_id, is_approval=True):
     """Get the client instance.
     Args:
         client_id (int): The client ID to consider.
+        is_approval (bool): If client is representing the 'approval' model (else, it is
+            representing the 'explain' model). Default to True.
     Returns:
         FHEModelClient: The client instance.
     """
+    key_suffix = "approval" if is_approval else "explain"
+    key_dir = FHE_KEYS / f"{client_id}_{key_suffix}"
+    client_dir = APPROVAL_DEPLOYMENT_PATH if is_approval else EXPLAIN_DEPLOYMENT_PATH
+    return MultiInputsFHEModelClient(client_dir, key_dir=key_dir, nb_inputs=len(CLIENT_TYPES))
 def _get_client_file_path(name, client_id, client_type=None):
     return client_id, evaluation_key_short, gr.update(value="Keys are generated and evaluation key is sent ✅")
+def _encrypt_send(client_id, inputs, client_type, app_mode=True):
     """Encrypt the given inputs for a specific client and send it to the server.
     Args:
         client_type (str): The type of client to consider (either 'user', 'bank' or 'third_party').
     Returns:
+        encrypted_inputs_short (str): A short representation of the encrypted input to send in hex.
     """
     if client_id == "":
         raise gr.Error("Please generate the keys first.")
     encrypted_inputs = client.quantize_encrypt_serialize_multi_inputs(
         inputs,
         input_index=INPUT_INDEXES[client_type],
+        processed_input_shape=APPROVAL_PROCESSED_INPUT_SHAPE,
+        input_slice=APPROVAL_INPUT_SLICES[client_type],
     )
     file_name = "encrypted_inputs"
     return encrypted_inputs_short
+def _pre_process_user(*inputs):
+    """Pre-process the user inputs.
     Args:
         *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
     Returns:
+        (numpy.ndarray): The pre-processed inputs.
     """
     bool_inputs, num_children, household_size, total_income, age, income_type, education_type, \
         family_status, occupation_type, housing_type = inputs
     preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
+    return preprocessed_user_inputs
+def pre_process_encrypt_send_user(client_id, *inputs):
+    """Pre-process, encrypt and send the user inputs for a specific client to the server.
+    Args:
+        client_id (str): The current client ID to consider.
+        *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
+    Returns:
+        (str): A short representation of the encrypted input to send in hex.
+    """
+    preprocessed_user_inputs = _pre_process_user(*inputs)
     return _encrypt_send(client_id, preprocessed_user_inputs, "user")
+def _pre_process_bank(*inputs):
+    """Pre-process the bank inputs.
     Args:
         *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
     Returns:
+        (numpy.ndarray): The pre-processed inputs.
     """
     account_age = inputs[0]
     preprocessed_bank_inputs = PRE_PROCESSOR_BANK.transform(bank_inputs)
+    return preprocessed_bank_inputs
+def pre_process_encrypt_send_bank(client_id, *inputs):
+    """Pre-process, encrypt and send the bank inputs for a specific client to the server.
+    Args:
+        client_id (str): The current client ID to consider.
+        *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
+    Returns:
+        (str): A short representation of the encrypted input to send in hex.
+    """
+    preprocessed_bank_inputs = _pre_process_bank(*inputs)
     return _encrypt_send(client_id, preprocessed_bank_inputs, "bank")
+def _pre_process_third_party(*inputs):
+    """Pre-process the third party inputs.
     Args:
         *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
     Returns:
+        (numpy.ndarray): The pre-processed inputs.
     """
+    third_party_data = {}
+    if len(inputs) == 1:
+        employed = inputs[0]
+    else:
+        employed, years_employed = inputs
+        third_party_data["Years_employed"] = [years_employed]
     is_employed = employed == "Yes"
+    third_party_data["Employed"] = [is_employed]
+    third_party_inputs = pandas.DataFrame(third_party_data)
+    if len(inputs) == 1:
+        preprocessed_third_party_inputs = third_party_inputs.to_numpy()
+    else:
+        third_party_inputs = third_party_inputs.reindex(APPROVAL_THIRD_PARTY_COLUMNS, axis=1)
+        preprocessed_third_party_inputs = PRE_PROCESSOR_THIRD_PARTY.transform(third_party_inputs)
+    return preprocessed_third_party_inputs
+def pre_process_encrypt_send_third_party(client_id, *inputs):
+    """Pre-process, encrypt and send the third party inputs for a specific client to the server.
+    Args:
+        client_id (str): The current client ID to consider.
+        *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
+    Returns:
+        (str): A short representation of the encrypted input to send in hex.
+    """
+    preprocessed_third_party_inputs = _pre_process_third_party(*inputs)
     return _encrypt_send(client_id, preprocessed_third_party_inputs, "third_party")
     # Determine the predicted class
     output = numpy.argmax(output_proba, axis=1).squeeze()
+    return "Credit card is likely to be approved ✅" if output == 1 else "Credit card is likely to be denied ❌"
+def years_employed_encrypt_run_decrypt(client_id, prediction_output, *inputs):
+    """Pre-process and encrypt the inputs, run the prediction in FHE and decrypt the output.
+    Args:
+        client_id (str): The current client ID to consider.
+        prediction_output (str): The initial prediction output. This parameter is only used to
+            throw an error in case the prediction was positive.
+        *inputs (Tuple[numpy.ndarray]): The inputs to consider.
+    Returns:
+        (str): A message indicating the number of additional years of employment that could be
+            required in order to increase the chance of
+            credit card approval.
+    """
+    if "approved" in prediction_output:
+        raise gr.Error(
+            "Explaining the prediction can only be done if the credit card is likely to be denied."
+        )
+    # Retrieve the client instance
+    client = _get_client(client_id, is_approval=False)
+    # Generate the private and evaluation keys
+    client.generate_private_and_evaluation_keys(force=False)
+    # Retrieve the serialized evaluation key
+    evaluation_key = client.get_serialized_evaluation_keys()
+    bool_inputs, num_children, household_size, total_income, age, income_type, education_type, \
+        family_status, occupation_type, housing_type, account_age, employed, years_employed = inputs
+    preprocessed_user_inputs = _pre_process_user(
+        bool_inputs, num_children, household_size, total_income, age, income_type, education_type,
+        family_status, occupation_type, housing_type,
+    )
+    preprocessed_bank_inputs = _pre_process_bank(account_age)
+    preprocessed_third_party_inputs = _pre_process_third_party(employed)
+    preprocessed_inputs = [
+        preprocessed_user_inputs,
+        preprocessed_bank_inputs,
+        preprocessed_third_party_inputs
+    ]
+    # Quantize, encrypt and serialize the inputs
+    encrypted_inputs = []
+    for client_type, preprocessed_input in zip(CLIENT_TYPES, preprocessed_inputs):
+        encrypted_input = client.quantize_encrypt_serialize_multi_inputs(
+            preprocessed_input,
+            input_index=INPUT_INDEXES[client_type],
+            processed_input_shape=EXPLAIN_PROCESSED_INPUT_SHAPE,
+            input_slice=EXPLAIN_INPUT_SLICES[client_type],
+        )
+        encrypted_inputs.append(encrypted_input)
+    # Run the FHE computation
+    encrypted_output = EXPLAIN_FHE_SERVER.run(
+        *encrypted_inputs,
+        serialized_evaluation_keys=evaluation_key
+    )
+    # Decrypt the output
+    output_prediction = client.deserialize_decrypt_dequantize(encrypted_output)
+    # Get the difference with the initial 'years of employment' input
+    years_employed_diff = int(numpy.ceil(output_prediction.squeeze() - years_employed))
+    if years_employed_diff > 0:
+        return (
+            f"Having at least {years_employed_diff} more years of employment would increase "
+            "your chance of having your credit card approved."
+        )
+    return (
+        "The number of years of employment you provided seems to be enough. The negative prediction "
+        "might come from other inputs."
+    )

deployment_files/{client.zip → approval_model/client.zip} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7bad4947dfc472f67c4ac52c5a26077177b8993ee8b1541ae3fb7c473d94d7fb
-size 28647

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2ceb4a6e07cd13471c8c8c963d9e4de52d5af624e81775ebeb2421e29b9ba8c
+size 28667

deployment_files/{server.zip → approval_model/server.zip} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b1e87acc2acda1565b6b23ea82be8d6c6cc4b3747106502f73ebc62397cceaa
-size 1731

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e724012427c90fdc8df14360942909e5fa0accc8b27584880baab2a91533e78
+size 1729

deployment_files/{versions.json → approval_model/versions.json} RENAMED Viewed

File without changes

deployment_files/explain_model/client.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:506276661b4612d664d59f0d90aac1b5c09f942a850ec189aa16204d54433b27
+size 27714

deployment_files/explain_model/server.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:596ae66c7effd9733a8780088984d4fc08479d67c11586ee5787111329cb353f
+size 2035

deployment_files/explain_model/versions.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"concrete-python": "2.5.0rc1", "concrete-ml": "1.3.0", "python": "3.10.11"}

development.py CHANGED Viewed

@@ -6,28 +6,49 @@ import pandas
 import pickle
 from settings import (
-    DEPLOYMENT_PATH,
     DATA_PATH,
-    INPUT_SLICES,
     PRE_PROCESSOR_USER_PATH,
     PRE_PROCESSOR_BANK_PATH,
     PRE_PROCESSOR_THIRD_PARTY_PATH,
     USER_COLUMNS,
     BANK_COLUMNS,
-    THIRD_PARTY_COLUMNS,
 )
 from utils.client_server_interface import MultiInputsFHEModelDev
-from utils.model import MultiInputDecisionTreeClassifier
 from utils.pre_processing import get_pre_processors
-def get_processed_multi_inputs(data):
     return (
-        data[:, INPUT_SLICES["user"]],
-        data[:, INPUT_SLICES["bank"]],
-        data[:, INPUT_SLICES["third_party"]]
     )
 print("Load and pre-process the data")
 # Load the data
@@ -40,7 +61,7 @@ data_y = data_x.pop("Target").copy().to_frame()
 # Get data from all parties
 data_user = data_x[USER_COLUMNS].copy()
 data_bank = data_x[BANK_COLUMNS].copy()
-data_third_party = data_x[THIRD_PARTY_COLUMNS].copy()
 # Feature engineer the data
 pre_processor_user, pre_processor_bank, pre_processor_third_party = get_pre_processors()
@@ -54,23 +75,23 @@ preprocessed_data_x = numpy.concatenate((preprocessed_data_user, preprocessed_da
 print("\nTrain and compile the model")
-model = MultiInputDecisionTreeClassifier()
-model, sklearn_model = model.fit_benchmark(preprocessed_data_x, data_y)
-multi_inputs_train = get_processed_multi_inputs(preprocessed_data_x)
-model.compile(*multi_inputs_train, inputs_encryption_status=["encrypted", "encrypted", "encrypted"])
 print("\nSave deployment files")
 # Delete the deployment folder and its content if it already exists
-if DEPLOYMENT_PATH.is_dir():
-    shutil.rmtree(DEPLOYMENT_PATH)
 # Save files needed for deployment (and enable cross-platform deployment)
-fhe_dev = MultiInputsFHEModelDev(DEPLOYMENT_PATH, model)
-fhe_dev.save(via_mlir=True)
 # Save pre-processors
 with (
@@ -82,4 +103,44 @@ with (
     pickle.dump(pre_processor_bank, file_bank)
     pickle.dump(pre_processor_third_party, file_third_party)
 print("\nDone !")

 import pickle
 from settings import (
+    APPROVAL_DEPLOYMENT_PATH,
+    EXPLAIN_DEPLOYMENT_PATH,
     DATA_PATH,
+    APPROVAL_INPUT_SLICES,
+    EXPLAIN_INPUT_SLICES,
     PRE_PROCESSOR_USER_PATH,
     PRE_PROCESSOR_BANK_PATH,
     PRE_PROCESSOR_THIRD_PARTY_PATH,
     USER_COLUMNS,
     BANK_COLUMNS,
+    APPROVAL_THIRD_PARTY_COLUMNS,
+    EXPLAIN_THIRD_PARTY_COLUMNS,
 )
 from utils.client_server_interface import MultiInputsFHEModelDev
+from utils.model import MultiInputDecisionTreeClassifier, MultiInputDecisionTreeRegressor
 from utils.pre_processing import get_pre_processors
+def get_multi_inputs(data, is_approval):
+    """Get inputs for all three parties from the input data, using fixed slices.
+    Args:
+        data (numpy.ndarray): The input data to consider.
+        is_approval (bool): If the data should be used for the 'approval' model (else, otherwise for
+            the 'explain' model).
+    Returns:
+        (Tuple[numpy.ndarray]): The inputs for all three parties.
+    """
+    if is_approval:
+        return (
+            data[:, APPROVAL_INPUT_SLICES["user"]],
+            data[:, APPROVAL_INPUT_SLICES["bank"]],
+            data[:, APPROVAL_INPUT_SLICES["third_party"]]
+        )
     return (
+        data[:, EXPLAIN_INPUT_SLICES["user"]],
+        data[:, EXPLAIN_INPUT_SLICES["bank"]],
+        data[:, EXPLAIN_INPUT_SLICES["third_party"]]
     )
 print("Load and pre-process the data")
 # Load the data
 # Get data from all parties
 data_user = data_x[USER_COLUMNS].copy()
 data_bank = data_x[BANK_COLUMNS].copy()
+data_third_party = data_x[APPROVAL_THIRD_PARTY_COLUMNS].copy()
 # Feature engineer the data
 pre_processor_user, pre_processor_bank, pre_processor_third_party = get_pre_processors()
 print("\nTrain and compile the model")
+model_approval = MultiInputDecisionTreeClassifier()
+model_approval, sklearn_model_approval = model_approval.fit_benchmark(preprocessed_data_x, data_y)
+multi_inputs_train = get_multi_inputs(preprocessed_data_x, is_approval=True)
+model_approval.compile(*multi_inputs_train, inputs_encryption_status=["encrypted", "encrypted", "encrypted"])
 print("\nSave deployment files")
 # Delete the deployment folder and its content if it already exists
+if APPROVAL_DEPLOYMENT_PATH.is_dir():
+    shutil.rmtree(APPROVAL_DEPLOYMENT_PATH)
 # Save files needed for deployment (and enable cross-platform deployment)
+fhe_model_dev_approval = MultiInputsFHEModelDev(APPROVAL_DEPLOYMENT_PATH, model_approval)
+fhe_model_dev_approval.save(via_mlir=True)
 # Save pre-processors
 with (
     pickle.dump(pre_processor_bank, file_bank)
     pickle.dump(pre_processor_third_party, file_third_party)
+print("\nLoad, train, compile and save files for the 'explain' model")
+# Define input and target data
+data_x = data.copy()
+data_y = data_x.pop("Years_employed").copy().to_frame()
+target_values = data_x.pop("Target").copy()
+# Get all data points whose target value is True (credit card has been approved)
+approved_mask = target_values == 1
+data_x_approved = data_x[approved_mask]
+data_y_approved = data_y[approved_mask]
+# Get data from all parties
+data_user = data_x_approved[USER_COLUMNS].copy()
+data_bank = data_x_approved[BANK_COLUMNS].copy()
+data_third_party = data_x_approved[EXPLAIN_THIRD_PARTY_COLUMNS].copy()
+preprocessed_data_user = pre_processor_user.transform(data_user)
+preprocessed_data_bank = pre_processor_bank.transform(data_bank)
+preprocessed_data_third_party = data_third_party.to_numpy()
+preprocessed_data_x = numpy.concatenate((preprocessed_data_user, preprocessed_data_bank, preprocessed_data_third_party), axis=1)
+model_explain = MultiInputDecisionTreeRegressor()
+model_explain, sklearn_model_explain = model_explain.fit_benchmark(preprocessed_data_x, data_y_approved)
+multi_inputs_train = get_multi_inputs(preprocessed_data_x, is_approval=False)
+model_explain.compile(*multi_inputs_train, inputs_encryption_status=["encrypted", "encrypted", "encrypted"])
+# Delete the deployment folder and its content if it already exists
+if EXPLAIN_DEPLOYMENT_PATH.is_dir():
+    shutil.rmtree(EXPLAIN_DEPLOYMENT_PATH)
+# Save files needed for deployment (and enable cross-platform deployment)
+fhe_model_dev_explain = MultiInputsFHEModelDev(EXPLAIN_DEPLOYMENT_PATH, model_explain)
+fhe_model_dev_explain.save(via_mlir=True)
 print("\nDone !")

server.py CHANGED Viewed

@@ -5,11 +5,11 @@ from typing import List, Optional
 from fastapi import FastAPI, File, Form, UploadFile
 from fastapi.responses import JSONResponse, Response
-from settings import DEPLOYMENT_PATH, SERVER_FILES, CLIENT_TYPES
 from utils.client_server_interface import MultiInputsFHEModelServer
 # Load the server
-FHE_SERVER = MultiInputsFHEModelServer(DEPLOYMENT_PATH)
 def _get_server_file_path(name, client_id, client_type=None):

 from fastapi import FastAPI, File, Form, UploadFile
 from fastapi.responses import JSONResponse, Response
+from settings import APPROVAL_DEPLOYMENT_PATH, SERVER_FILES, CLIENT_TYPES
 from utils.client_server_interface import MultiInputsFHEModelServer
 # Load the server
+FHE_SERVER = MultiInputsFHEModelServer(APPROVAL_DEPLOYMENT_PATH)
 def _get_server_file_path(name, client_id, client_type=None):

settings.py CHANGED Viewed

@@ -6,12 +6,16 @@ import pandas
 # The directory of this project
 REPO_DIR = Path(__file__).parent
-# This repository's main necessary directories
 DEPLOYMENT_PATH = REPO_DIR / "deployment_files"
 FHE_KEYS = REPO_DIR / ".fhe_keys"
 CLIENT_FILES = REPO_DIR / "client_files"
 SERVER_FILES = REPO_DIR / "server_files"
 # Path targeting pre-processor saved files
 PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
 PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl'
@@ -29,7 +33,8 @@ SERVER_URL = "http://localhost:8000/"
 DATA_PATH = "data/data.csv"
 # Development settings
-PROCESSED_INPUT_SHAPE = (1, 39)
 CLIENT_TYPES = ["user", "bank", "third_party"]
 INPUT_INDEXES = {
@@ -37,19 +42,26 @@ INPUT_INDEXES = {
     "bank": 1,
     "third_party": 2,
 }
-INPUT_SLICES = {
     "user": slice(0, 36),  # First position: start from 0
     "bank": slice(36, 37),  # Second position: start from n_feature_user
     "third_party": slice(37, 39),  # Third position: start from n_feature_user + n_feature_bank
 }
 USER_COLUMNS = [
     'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size',
     'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type',
     'Occupation_type',
 ]
 BANK_COLUMNS = ["Account_age"]
-THIRD_PARTY_COLUMNS = ["Years_employed", "Employed"]
 _data = pandas.read_csv(DATA_PATH, encoding="utf-8")

 # The directory of this project
 REPO_DIR = Path(__file__).parent
+# Main necessary directories
 DEPLOYMENT_PATH = REPO_DIR / "deployment_files"
 FHE_KEYS = REPO_DIR / ".fhe_keys"
 CLIENT_FILES = REPO_DIR / "client_files"
 SERVER_FILES = REPO_DIR / "server_files"
+# ALl deployment directories
+APPROVAL_DEPLOYMENT_PATH = DEPLOYMENT_PATH / "approval_model"
+EXPLAIN_DEPLOYMENT_PATH = DEPLOYMENT_PATH / "explain_model"
 # Path targeting pre-processor saved files
 PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
 PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl'
 DATA_PATH = "data/data.csv"
 # Development settings
+APPROVAL_PROCESSED_INPUT_SHAPE = (1, 39)
+EXPLAIN_PROCESSED_INPUT_SHAPE = (1, 38)
 CLIENT_TYPES = ["user", "bank", "third_party"]
 INPUT_INDEXES = {
     "bank": 1,
     "third_party": 2,
 }
+APPROVAL_INPUT_SLICES = {
     "user": slice(0, 36),  # First position: start from 0
     "bank": slice(36, 37),  # Second position: start from n_feature_user
     "third_party": slice(37, 39),  # Third position: start from n_feature_user + n_feature_bank
 }
+EXPLAIN_INPUT_SLICES = {
+    "user": slice(0, 36),  # First position: start from 0
+    "bank": slice(36, 37),  # Second position: start from n_feature_user
+    "third_party": slice(37, 38),  # Third position: start from n_feature_user + n_feature_bank
+}
+# Fix column order for pre-processing steps
 USER_COLUMNS = [
     'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size',
     'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type',
     'Occupation_type',
 ]
 BANK_COLUMNS = ["Account_age"]
+APPROVAL_THIRD_PARTY_COLUMNS = ["Years_employed", "Employed"]
+EXPLAIN_THIRD_PARTY_COLUMNS = ["Employed"]
 _data = pandas.read_csv(DATA_PATH, encoding="utf-8")

utils/client_server_interface.py CHANGED Viewed

@@ -3,10 +3,11 @@
 import numpy
 import copy
-from concrete.fhe import Value, EvaluationKeys
 from concrete.ml.deployment.fhe_client_server import FHEModelClient, FHEModelDev, FHEModelServer
-from concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier
 class MultiInputsFHEModelDev(FHEModelDev):
@@ -15,8 +16,9 @@ class MultiInputsFHEModelDev(FHEModelDev):
         super().__init__(*arg, **kwargs)
         model = copy.copy(self.model)
-        model.__class__ = ConcreteXGBClassifier
         self.model = model
@@ -30,10 +32,27 @@ class MultiInputsFHEModelClient(FHEModelClient):
     def quantize_encrypt_serialize_multi_inputs(
         self,
         x: numpy.ndarray,
-        input_index,
-        processed_input_shape,
-        input_slice
     ) -> bytes:
         x_padded = numpy.zeros(processed_input_shape)
@@ -58,15 +77,15 @@ class MultiInputsFHEModelServer(FHEModelServer):
     def run(
         self,
-        *serialized_encrypted_quantized_data: bytes,
         serialized_evaluation_keys: bytes,
     ) -> bytes:
-        """Run the model on the server over encrypted data.
         Args:
-            serialized_encrypted_quantized_data (bytes): the encrypted, quantized
-                and serialized data
-            serialized_evaluation_keys (bytes): the serialized evaluation keys
         Returns:
             bytes: the result of the model

 import numpy
 import copy
+from typing import Tuple
+from concrete.fhe import Value, EvaluationKeys
 from concrete.ml.deployment.fhe_client_server import FHEModelClient, FHEModelDev, FHEModelServer
+from concrete.ml.sklearn import DecisionTreeClassifier
 class MultiInputsFHEModelDev(FHEModelDev):
         super().__init__(*arg, **kwargs)
+        # Workaround that enables loading a modified version of a DecisionTreeClassifier model
         model = copy.copy(self.model)
+        model.__class__ = DecisionTreeClassifier
         self.model = model
     def quantize_encrypt_serialize_multi_inputs(
         self,
         x: numpy.ndarray,
+        input_index: int,
+        processed_input_shape: Tuple[int],
+        input_slice: slice,
     ) -> bytes:
+        """Quantize, encrypt and serialize inputs for a multi-party model.
+        In the following, the 'quantize_input' method called is the one defined in Concrete ML's
+        built-in models. Since they don't natively handle inputs for multi-party models, we need
+        to use padding along indexing and slicing so that inputs from a specific party are correctly
+        associated with input quantizers.
+        Args:
+            x (numpy.ndarray): The input to consider. Here, the input should only represent a
+                single party.
+            input_index (int): The index representing the type of model (0: "user", 1: "bank",
+                2: "third_party")
+            processed_input_shape (Tuple[int]): The total input shape (all parties combined) after
+                pre-processing.
+            input_slice (slice): The slices to consider for the given party.
+        """
         x_padded = numpy.zeros(processed_input_shape)
     def run(
         self,
+        *serialized_encrypted_quantized_data: Tuple[bytes],
         serialized_evaluation_keys: bytes,
     ) -> bytes:
+        """Run the model on the server over encrypted data for a multi-party model.
         Args:
+            serialized_encrypted_quantized_data (Tuple[bytes]): The encrypted, quantized
+                and serialized data for a multi-party model.
+            serialized_evaluation_keys (bytes): The serialized evaluation key.
         Returns:
             bytes: the result of the model

utils/model.py CHANGED Viewed

@@ -13,7 +13,7 @@ from concrete.ml.common.utils import (
     check_there_is_no_p_error_options_in_configuration
 )
 from concrete.ml.quantization.quantized_module import QuantizedModule, _get_inputset_generator
-from concrete.ml.sklearn import DecisionTreeClassifier
 class MultiInputModel:
@@ -131,46 +131,8 @@ class MultiInputModel:
         return compiler
-    def predict_multi_inputs(self, *multi_inputs, simulate=True):
-        """Run the inference with multiple inputs, with simulation or in FHE."""
-        assert all(isinstance(inputs, numpy.ndarray) for inputs in multi_inputs)
-        if not simulate:
-            self.fhe_circuit.keygen()
-        y_preds = []
-        execution_times = []
-        for inputs in zip(*multi_inputs):
-            inputs = tuple(numpy.expand_dims(input, axis=0) for input in inputs)
-            q_inputs = self.quantize_input(*inputs)
-            if simulate:
-                q_y_proba = self.fhe_circuit.simulate(*q_inputs)
-            else:
-                q_inputs_enc = self.fhe_circuit.encrypt(*q_inputs)
-                start = time.time()
-                q_y_proba_enc = self.fhe_circuit.run(*q_inputs_enc)
-                end = time.time() - start
-                execution_times.append(end)
-                q_y_proba = self.fhe_circuit.decrypt(q_y_proba_enc)
-            y_proba = self.dequantize_output(q_y_proba)
-            y_proba = self.post_processing(y_proba)
-            y_pred = numpy.argmax(y_proba, axis=1)
-            y_preds.append(y_pred)
-        if not simulate:
-            print(f"FHE execution time per inference: {numpy.mean(execution_times) :.2}s")
-        return numpy.array(y_preds)
 class MultiInputDecisionTreeClassifier(MultiInputModel, DecisionTreeClassifier):
-    pass

     check_there_is_no_p_error_options_in_configuration
 )
 from concrete.ml.quantization.quantized_module import QuantizedModule, _get_inputset_generator
+from concrete.ml.sklearn import DecisionTreeClassifier, DecisionTreeRegressor
 class MultiInputModel:
         return compiler
 class MultiInputDecisionTreeClassifier(MultiInputModel, DecisionTreeClassifier):
+    pass
+class MultiInputDecisionTreeRegressor(MultiInputModel, DecisionTreeRegressor):
+    pass