File size: 9,138 Bytes
25285c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e64ba8b
25285c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e64ba8b
25285c4
e64ba8b
 
25285c4
 
 
 
 
e64ba8b
 
 
25285c4
e64ba8b
 
25285c4
 
 
e64ba8b
7e84a34
98ffb58
 
 
 
 
 
7b6fdda
 
 
 
 
 
 
 
 
 
 
 
 
 
c173919
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
INFO:__main__:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=IntervalStrategy.EPOCH,
eval_use_gather_object=False,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
gradient_checkpointing_kwargs=None,
greater_is_better=False,
group_by_length=False,
half_precision_backend=auto,
hub_always_push=False,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,
include_num_input_tokens_seen=False,
include_tokens_per_second=False,
jit_mode_eval=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=5e-05,
length_column_name=length,
load_best_model_at_end=True,
local_rank=0,
log_level=passive,
log_level_replica=warning,
log_on_each_node=True,
logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_yem/runs/Sep01_15-13-26_lmgpu-node-07,
logging_first_step=False,
logging_nan_inf_filter=True,
logging_steps=500,
logging_strategy=IntervalStrategy.EPOCH,
lr_scheduler_kwargs={},
lr_scheduler_type=SchedulerType.LINEAR,
max_grad_norm=1.0,
max_steps=-1,
metric_for_best_model=loss,
mp_parameters=,
neftune_noise_alpha=None,
no_cuda=False,
num_train_epochs=20.0,
optim=OptimizerNames.ADAMW_TORCH,
optim_args=None,
optim_target_modules=None,
output_dir=/home/iais_marenpielka/Bouthaina/res_nw_yem,
overwrite_output_dir=False,
past_index=-1,
per_device_eval_batch_size=8,
per_device_train_batch_size=8,
prediction_loss_only=False,
push_to_hub=True,
push_to_hub_model_id=None,
push_to_hub_organization=None,
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
ray_scope=last,
remove_unused_columns=True,
report_to=[],
restore_callback_states_from_checkpoint=False,
resume_from_checkpoint=None,
run_name=/home/iais_marenpielka/Bouthaina/res_nw_yem,
save_on_each_node=False,
save_only_model=False,
save_safetensors=True,
save_steps=500,
save_strategy=IntervalStrategy.EPOCH,
save_total_limit=None,
seed=42,
skip_memory_metrics=True,
split_batches=None,
tf32=None,
torch_compile=False,
torch_compile_backend=None,
torch_compile_mode=None,
torch_empty_cache_steps=None,
torchdynamo=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
use_cpu=False,
use_ipex=False,
use_legacy_prediction_loop=False,
use_mps_device=False,
warmup_ratio=0.0,
warmup_steps=500,
weight_decay=0.0,
)
INFO:datasets.builder:Using custom data configuration default-7d1e1bd6ffb527f0
INFO:datasets.info:Loading Dataset Infos from /home/iais_marenpielka/Bouthaina/miniconda3/lib/python3.12/site-packages/datasets/packaged_modules/text
INFO:datasets.builder:Generating dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-7d1e1bd6ffb527f0/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
INFO:datasets.builder:Downloading and preparing dataset text/default to /home/iais_marenpielka/.cache/huggingface/datasets/text/default-7d1e1bd6ffb527f0/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101...
INFO:datasets.download.download_manager:Downloading took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min
INFO:datasets.builder:Generating train split
INFO:datasets.builder:Generating validation split
INFO:datasets.utils.info_utils:Unable to verify splits sizes.
INFO:datasets.builder:Dataset text downloaded and prepared to /home/iais_marenpielka/.cache/huggingface/datasets/text/default-7d1e1bd6ffb527f0/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101. Subsequent calls will reuse this data.
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-7d1e1bd6ffb527f0/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-c86dc1fa59adefd8.arrow
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-7d1e1bd6ffb527f0/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-996802700aec694a.arrow
WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-7d1e1bd6ffb527f0/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-757314bea7c4cdbc.arrow
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-7d1e1bd6ffb527f0/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-9b49241998bcfbce.arrow
WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
INFO:root:Epoch 1.0: Train Loss = None, Eval Loss = None
INFO:absl:Using default tokenizer.
INFO:root:Epoch 2.0: Train Loss = 7.4125, Eval Loss = 2.3318939208984375
INFO:absl:Using default tokenizer.
INFO:root:Epoch 3.0: Train Loss = 1.0077, Eval Loss = 0.5519431233406067
INFO:absl:Using default tokenizer.
INFO:root:Epoch 4.0: Train Loss = 0.483, Eval Loss = 0.4972667694091797
INFO:absl:Using default tokenizer.
INFO:root:Epoch 5.0: Train Loss = 0.3795, Eval Loss = 0.4739198088645935
INFO:absl:Using default tokenizer.
INFO:root:Epoch 6.0: Train Loss = 0.2901, Eval Loss = 0.46911630034446716
INFO:absl:Using default tokenizer.
INFO:root:Epoch 7.0: Train Loss = 0.2239, Eval Loss = 0.465503990650177
INFO:absl:Using default tokenizer.
INFO:root:Epoch 8.0: Train Loss = 0.1763, Eval Loss = 0.46726807951927185
INFO:absl:Using default tokenizer.
INFO:root:Epoch 9.0: Train Loss = 0.1429, Eval Loss = 0.46664053201675415
INFO:absl:Using default tokenizer.
WARNING:huggingface_hub.utils._http:'(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/4e/c5/4ec5c12bd4b31fb218515ee480d86e26419d948a9a596a06a6ad09ce77d37e3a/c52fd49ac31e5e995d8cb1700c2c8b272c7ee8f89a893eb7b460984d167687cc?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20240901%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240901T153010Z&X-Amz-Expires=86400&X-Amz-Signature=00c6ff133629912a0a6b5b4c79dac4cfb2cb3e5a393d8142d19ac9836152469e&X-Amz-SignedHeaders=host&partNumber=27&uploadId=IIqkH7fFY3bWTLeztTG6lP1VBL_yNebxVqEQKh1NFhce4EErBOOV6kEWg53qo6At_KkjssPuSOeAw0iZNAGF9P8NvFgZBejiNwczhYiT_FROh7cabpyEmvO6Nez4.zL4&x-id=UploadPart (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2406)')))"), '(Request ID: 5306a365-2456-4b12-80b4-c53a02646f19)')' thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/4e/c5/4ec5c12bd4b31fb218515ee480d86e26419d948a9a596a06a6ad09ce77d37e3a/c52fd49ac31e5e995d8cb1700c2c8b272c7ee8f89a893eb7b460984d167687cc?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20240901%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240901T153010Z&X-Amz-Expires=86400&X-Amz-Signature=00c6ff133629912a0a6b5b4c79dac4cfb2cb3e5a393d8142d19ac9836152469e&X-Amz-SignedHeaders=host&partNumber=27&uploadId=IIqkH7fFY3bWTLeztTG6lP1VBL_yNebxVqEQKh1NFhce4EErBOOV6kEWg53qo6At_KkjssPuSOeAw0iZNAGF9P8NvFgZBejiNwczhYiT_FROh7cabpyEmvO6Nez4.zL4&x-id=UploadPart
WARNING:huggingface_hub.utils._http:Retrying in 1s [Retry 1/5].
INFO:root:Epoch 10.0: Train Loss = 0.1206, Eval Loss = 0.47646617889404297
INFO:absl:Using default tokenizer.
INFO:root:Epoch 11.0: Train Loss = 0.1052, Eval Loss = 0.47586962580680847
INFO:absl:Using default tokenizer.
INFO:__main__:*** Evaluate ***
INFO:absl:Using default tokenizer.