crystal-technologies
/

CRYSTAL-R1

Upload 1287 files

2d8da09 about 1 year ago

1.01 kB

	table_structure:
	- name: col_a
	code_type: float
	args:
	code_len: 4 # number of tokens used to code the column
	base: 16 # the positional base number. ie. it uses 16 tokens for one digit
	fillall: False # whether to use full base number for each token or derive it from the data.
	hasnan: False # can it handles nan or not
	transform: yeo-johnson # can be ['yeo-johnson', 'quantile', 'robust'], check https://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing
	- name: col_b
	code_type: float
	args:
	code_len: 4
	base: 32
	fillall: True
	hasnan: True
	transform: quantile
	- name: col_c
	code_type: int
	args:
	code_len: 3
	base: 12
	fillall: True
	hasnan: True
	- name: col_d
	code_type: category
	args:
	code_len: 3
	base: 12
	fillall: True
	hasnan: True
	tokenizer_file: ??? # tabular tokneizer output file path
	table_csv_file: ??? # input table csv file