Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,854 Bytes
c119738 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
"A script to generate all development files necessary for the project."
import shutil
import numpy
import pandas
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from ..settings import DEPLOYMENT_PATH, RANDOM_STATE
from client_server_interface import MultiInputsFHEModelDev
from model import MultiInputXGBClassifier
from development.pre_processing import pre_process_data
print("Load and pre-process the data")
data = pandas.read_csv("data/clean_data.csv", encoding="utf-8")
# Make median annual salary similar to France (2023): from 157500 to 22050
data["Total_income"] = data["Total_income"] * 0.14
# Remove ID feature
data.drop("ID", axis=1, inplace=True)
# Feature engineer the data
pre_processed_data, training_bins = pre_process_data(data)
# Define input and target data
y = pre_processed_data.pop("Target")
x = pre_processed_data
# The initial data-set is very imbalanced: use SMOTE to get better results
x, y = SMOTE().fit_resample(x, y)
# Retrieve the training data
X_train, _, y_train, _ = train_test_split(
x, y, stratify=y, test_size=0.3, random_state=RANDOM_STATE
)
# Convert the Pandas data frames into Numpy arrays
X_train_np = X_train.to_numpy()
y_train_np = y_train.to_numpy()
print("Train and compile the model")
model = MultiInputXGBClassifier(max_depth=3, n_estimators=40)
model.fit(X_train_np, y_train_np)
multi_inputs_train = numpy.array_split(X_train_np, 3, axis=1)
model.compile(*multi_inputs_train, inputs_encryption_status=["encrypted", "encrypted", "encrypted"])
# Delete the deployment folder and its content if it already exists
if DEPLOYMENT_PATH.is_dir():
shutil.rmtree(DEPLOYMENT_PATH)
print("Save deployment files")
# Save the files needed for deployment
fhe_dev = MultiInputsFHEModelDev(model, DEPLOYMENT_PATH)
fhe_dev.save()
print("Done !")
|