File size: 1,463 Bytes
aaef8c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
{
"tokenizer_path_name": null,
"vocab_name": "bio-clinical",
"tokenizer": "bbpe-roberta",
"lowercase": false,
"vocab_size": 50262,
"min_frequency": 6,
"extra_tokens": [],
"limit_alphabet": 1000,
"max_len": 512,
"no_show_progress": false,
"strip_accents": false,
"no_handle_chinese_chars": false,
"no_clean_text": false,
"reserve_tokens": 0,
"use_tokenizers": false,
"no_fairseq": false,
"bbpe_add_prefix_space": true,
"single_paragraph_add_punct": true,
"tok_batch_size": 100000000,
"files": [
"/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/train.txt",
"/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/valid.txt",
"/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/test.txt"
],
"output_root_path": "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e",
"commit_hash": "d1d3920e7012caf14c9d6968fded36e0dd719a51"
} |