crystal-technologies's picture
Upload 1287 files
2d8da09
raw
history blame
1.01 kB
table_structure:
- name: col_a
code_type: float
args:
code_len: 4 # number of tokens used to code the column
base: 16 # the positional base number. ie. it uses 16 tokens for one digit
fillall: False # whether to use full base number for each token or derive it from the data.
hasnan: False # can it handles nan or not
transform: yeo-johnson # can be ['yeo-johnson', 'quantile', 'robust'], check https://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing
- name: col_b
code_type: float
args:
code_len: 4
base: 32
fillall: True
hasnan: True
transform: quantile
- name: col_c
code_type: int
args:
code_len: 3
base: 12
fillall: True
hasnan: True
- name: col_d
code_type: category
args:
code_len: 3
base: 12
fillall: True
hasnan: True
tokenizer_file: ??? # tabular tokneizer output file path
table_csv_file: ??? # input table csv file