wandb_version: 1 _wandb: desc: null value: cli_version: 0.12.9 code_path: code/run_mlm_flax.py framework: huggingface huggingface_version: 4.16.0.dev0 is_jupyter_run: false is_kaggle_kernel: false python_version: 3.8.10 start_time: 1642687009 t: 1: - 11 - 12 2: - 11 - 12 4: 3.8.10 5: 0.12.9 6: 4.16.0.dev0 8: - 5 adafactor: desc: null value: false adam_beta1: desc: null value: 0.9 adam_beta2: desc: null value: 0.98 adam_epsilon: desc: null value: 1.0e-06 cache_dir: desc: null value: null config_name: desc: null value: ./ dataset_config_name: desc: null value: null dataset_name: desc: null value: NbAiLab/NCC do_eval: desc: null value: true do_train: desc: null value: true dtype: desc: null value: bfloat16 eval_steps: desc: null value: 1000 hub_model_id: desc: null value: null hub_token: desc: null value: null learning_rate: desc: null value: 0.00015 line_by_line: desc: null value: false logging_steps: desc: null value: 1000 max_seq_length: desc: null value: 512 mlm_probability: desc: null value: 0.15 model_name_or_path: desc: null value: versae/roberta-base-ncc model_type: desc: null value: roberta num_train_epochs: desc: null value: 3.0 output_dir: desc: null value: ./ overwrite_cache: desc: null value: false overwrite_output_dir: desc: null value: true pad_to_max_length: desc: null value: true per_device_eval_batch_size: desc: null value: 46 per_device_train_batch_size: desc: null value: 46 preprocessing_num_workers: desc: null value: null push_to_hub: desc: null value: true save_steps: desc: null value: 1000 seed: desc: null value: 42 tokenizer_name: desc: null value: ./ train_file: desc: null value: null train_ref_file: desc: null value: null use_fast_tokenizer: desc: null value: true validation_file: desc: null value: null validation_ref_file: desc: null value: null validation_split_percentage: desc: null value: 5 warmup_steps: desc: null value: 1000 weight_decay: desc: null value: 0.01