File size: 6,764 Bytes
cfb7e96 d7d19eb d8778c1 d7d19eb d8778c1 d7d19eb d8778c1 d7d19eb d8778c1 d7d19eb c8c87e8 6a1ae66 ce9d8f3 4927bca be78f75 4864a65 00bad79 bfb3e10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False INFO:__main__:Training/evaluation parameters TrainingArguments( _n_gpu=1, accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False}, adafactor=False, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=False, batch_eval_metrics=False, bf16=False, bf16_full_eval=False, data_seed=None, dataloader_drop_last=False, dataloader_num_workers=0, dataloader_persistent_workers=False, dataloader_pin_memory=True, dataloader_prefetch_factor=None, ddp_backend=None, ddp_broadcast_buffers=None, ddp_bucket_cap_mb=None, ddp_find_unused_parameters=None, ddp_timeout=1800, debug=[], deepspeed=None, disable_tqdm=False, dispatch_batches=None, do_eval=True, do_predict=False, do_train=True, eval_accumulation_steps=None, eval_delay=0, eval_do_concat_batches=True, eval_on_start=False, eval_steps=None, eval_strategy=IntervalStrategy.EPOCH, eval_use_gather_object=False, evaluation_strategy=epoch, fp16=False, fp16_backend=auto, fp16_full_eval=False, fp16_opt_level=O1, fsdp=[], fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap=None, full_determinism=False, gradient_accumulation_steps=1, gradient_checkpointing=False, gradient_checkpointing_kwargs=None, greater_is_better=False, group_by_length=False, half_precision_backend=auto, hub_always_push=False, hub_model_id=None, hub_private_repo=False, hub_strategy=HubStrategy.EVERY_SAVE, hub_token=<HUB_TOKEN>, ignore_data_skip=False, include_inputs_for_metrics=False, include_num_input_tokens_seen=False, include_tokens_per_second=False, jit_mode_eval=False, label_names=None, label_smoothing_factor=0.0, learning_rate=5e-05, length_column_name=length, load_best_model_at_end=True, local_rank=0, log_level=passive, log_level_replica=warning, log_on_each_node=True, logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_dj/runs/Sep01_13-56-27_lmgpu-node-07, logging_first_step=False, logging_nan_inf_filter=True, logging_steps=500, logging_strategy=IntervalStrategy.EPOCH, lr_scheduler_kwargs={}, lr_scheduler_type=SchedulerType.LINEAR, max_grad_norm=1.0, max_steps=-1, metric_for_best_model=loss, mp_parameters=, neftune_noise_alpha=None, no_cuda=False, num_train_epochs=20.0, optim=OptimizerNames.ADAMW_TORCH, optim_args=None, optim_target_modules=None, output_dir=/home/iais_marenpielka/Bouthaina/res_nw_dj, overwrite_output_dir=False, past_index=-1, per_device_eval_batch_size=8, per_device_train_batch_size=8, prediction_loss_only=False, push_to_hub=True, push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=<PUSH_TO_HUB_TOKEN>, ray_scope=last, remove_unused_columns=True, report_to=[], restore_callback_states_from_checkpoint=False, resume_from_checkpoint=None, run_name=/home/iais_marenpielka/Bouthaina/res_nw_dj, save_on_each_node=False, save_only_model=False, save_safetensors=True, save_steps=500, save_strategy=IntervalStrategy.EPOCH, save_total_limit=None, seed=42, skip_memory_metrics=True, split_batches=None, tf32=None, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, torch_empty_cache_steps=None, torchdynamo=None, tpu_metrics_debug=False, tpu_num_cores=None, use_cpu=False, use_ipex=False, use_legacy_prediction_loop=False, use_mps_device=False, warmup_ratio=0.0, warmup_steps=500, weight_decay=0.0, ) INFO:datasets.builder:Using custom data configuration default-70891baac37034df INFO:datasets.info:Loading Dataset Infos from /home/iais_marenpielka/Bouthaina/miniconda3/lib/python3.12/site-packages/datasets/packaged_modules/text INFO:datasets.builder:Overwrite dataset info from restored data version if exists. INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-70891baac37034df/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101 INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-70891baac37034df/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101) INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-70891baac37034df/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101 INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-70891baac37034df/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-0c0084b2e51c93f9.arrow INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-70891baac37034df/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-77487910604ea5b4.arrow WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx. INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-70891baac37034df/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-f0985db2addeab61.arrow INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-70891baac37034df/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-555eefcdfe4bb35c.arrow WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. INFO:root:Epoch 1.0: Train Loss = None, Eval Loss = None INFO:absl:Using default tokenizer. INFO:root:Epoch 2.0: Train Loss = 1.2336, Eval Loss = 0.7061845660209656 INFO:absl:Using default tokenizer. INFO:root:Epoch 3.0: Train Loss = 0.634, Eval Loss = 0.6422649025917053 INFO:absl:Using default tokenizer. INFO:root:Epoch 4.0: Train Loss = 0.5299, Eval Loss = 0.6245766282081604 INFO:absl:Using default tokenizer. INFO:root:Epoch 5.0: Train Loss = 0.4492, Eval Loss = 0.6246171593666077 INFO:absl:Using default tokenizer. INFO:root:Epoch 6.0: Train Loss = 0.3829, Eval Loss = 0.6300457715988159 INFO:absl:Using default tokenizer. INFO:root:Epoch 7.0: Train Loss = 0.328, Eval Loss = 0.6383510231971741 INFO:absl:Using default tokenizer. INFO:root:Epoch 8.0: Train Loss = 0.2832, Eval Loss = 0.6490957736968994 INFO:absl:Using default tokenizer. |