Spaces:

zama-fhe
/

encrypted_credit_scoring

Running on CPU Upgrade

File size: 3,010 Bytes

1ba3f22
 
 
9a997e4
1ba3f22
 
 
 
74c0c8e
c119738
1ba3f22
 
 
 
74c0c8e
b47829b
74c0c8e
9a997e4
8e0d56d
18ba8c1
8e0d56d
9a997e4
1ba3f22
 
 
 
 
 
 
 
9a997e4
993f2a6
c119738
a241bb3
b47829b
c119738
8e0d56d
c119738
8e0d56d
c119738
8e0d56d
c119738
b47829b
8e0d56d
 
 
c119738
9a997e4
74c0c8e
8e0d56d
18ba8c1
a241bb3
 
 
31284a7
8e0d56d
a241bb3
9a997e4
 
 
 
 
 
 
 
 
 
31284a7
9a997e4
 
 
993f2a6
9a997e4
b47829b
 
 
 
9a997e4
 
 
 
 
 
b47829b
 
 
 
9a997e4
b47829b

"All constants used in the project."

from pathlib import Path
import pandas

# The directory of this project
REPO_DIR = Path(__file__).parent

# Main necessary directories
DEPLOYMENT_PATH = REPO_DIR / "deployment_files"
FHE_KEYS = REPO_DIR / ".fhe_keys"
CLIENT_FILES = REPO_DIR / "client_files"
SERVER_FILES = REPO_DIR / "server_files"

# ALl deployment directories
DEPLOYMENT_PATH = DEPLOYMENT_PATH / "model"

# Path targeting pre-processor saved files
PRE_PROCESSOR_APPLICANT_PATH = DEPLOYMENT_PATH / 'pre_processor_applicant.pkl'
PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl'
PRE_PROCESSOR_CREDIT_BUREAU_PATH = DEPLOYMENT_PATH / 'pre_processor_credit_bureau.pkl'

# Create the necessary directories
FHE_KEYS.mkdir(exist_ok=True)
CLIENT_FILES.mkdir(exist_ok=True)
SERVER_FILES.mkdir(exist_ok=True)

# Store the server's URL
SERVER_URL = "http://localhost:8000/" 

# Path to data file
DATA_PATH = "data/data.csv"

# Development settings
PROCESSED_INPUT_SHAPE = (1, 39)

CLIENT_TYPES = ["applicant", "bank", "credit_bureau"]
INPUT_INDEXES = {
    "applicant": 0,
    "bank": 1,
    "credit_bureau": 2,
}
INPUT_SLICES = {
    "applicant": slice(0, 36),  # First position: start from 0
    "bank": slice(36, 37),  # Second position: start from n_feature_applicant
    "credit_bureau": slice(37, 39),  # Third position: start from n_feature_applicant + n_feature_bank
}

# Fix column order for pre-processing steps
APPLICANT_COLUMNS = [
    'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size', 
    'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type', 
    'Occupation_type',
]
BANK_COLUMNS = ["Account_age"]
CREDIT_BUREAU_COLUMNS = ["Years_employed", "Employed"]

_data = pandas.read_csv(DATA_PATH, encoding="utf-8")

def get_min_max(data, column):
    """Get min/max values of a column in order to input them in Gradio's API as key arguments."""
    return {
        "minimum": int(data[column].min()),
        "maximum": int(data[column].max()), 
    }

# App data min and max values
ACCOUNT_MIN_MAX = get_min_max(_data, "Account_age")
CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
INCOME_MIN_MAX = get_min_max(_data, "Total_income")
AGE_MIN_MAX = get_min_max(_data, "Age")
FAMILY_MIN_MAX = get_min_max(_data, "Household_size")

# Default values
INCOME_VALUE = 12000
AGE_VALUE = 30

# App data choices 
INCOME_TYPES = list(_data["Income_type"].unique())
OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
HOUSING_TYPES = list(_data["Housing_type"].unique())
EDUCATION_TYPES = list(_data["Education_type"].unique())
FAMILY_STATUS = list(_data["Family_status"].unique())
YEARS_EMPLOYED_BINS = ['0-2', '2-5', '5-8', '8-11', '11-18', '18+']

# Years_employed bin order 
YEARS_EMPLOYED_BIN_NAME_TO_INDEX = {bin_name: i for i, bin_name in enumerate(YEARS_EMPLOYED_BINS)}

assert len(YEARS_EMPLOYED_BINS) == len(list(_data["Years_employed"].unique())), (
    "Years_employed bins are not matching the expected list"
)