table_structure: - name: col_a code_type: float args: code_len: 4 # number of tokens used to code the column base: 16 # the positional base number. ie. it uses 16 tokens for one digit fillall: False # whether to use full base number for each token or derive it from the data. hasnan: False # can it handles nan or not transform: yeo-johnson # can be ['yeo-johnson', 'quantile', 'robust'], check https://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing - name: col_b code_type: float args: code_len: 4 base: 32 fillall: True hasnan: True transform: quantile - name: col_c code_type: int args: code_len: 3 base: 12 fillall: True hasnan: True - name: col_d code_type: category args: code_len: 3 base: 12 fillall: True hasnan: True tokenizer_file: ??? # tabular tokneizer output file path table_csv_file: ??? # input table csv file