File size: 5,997 Bytes
abb8a1f c035e58 1cb4ee3 420217d 2f5ea4e 56e9c92 8d4fecb d2b7db6 74a09a4 3805c5d 4807529 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
01/29/2024 19:52:08 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: True
01/29/2024 19:52:08 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=200,
evaluation_strategy=IntervalStrategy.STEPS,
fp16=True,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=True,
gradient_checkpointing_kwargs=None,
greater_is_better=None,
group_by_length=True,
half_precision_backend=auto,
hub_always_push=False,
hub_model_id=simpragma/breeze-listen-w2v2-ml,
hub_private_repo=False,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,
include_num_input_tokens_seen=False,
include_tokens_per_second=False,
jit_mode_eval=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=0.001,
length_column_name=input_length,
load_best_model_at_end=False,
local_rank=0,
log_level=passive,
log_level_replica=warning,
log_on_each_node=True,
logging_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-ml/runs/Jan29_19-52-08_knight,
logging_first_step=False,
logging_nan_inf_filter=True,
logging_steps=500,
logging_strategy=IntervalStrategy.STEPS,
lr_scheduler_kwargs={},
lr_scheduler_type=SchedulerType.LINEAR,
max_grad_norm=1.0,
max_steps=-1,
metric_for_best_model=None,
mp_parameters=,
neftune_noise_alpha=None,
no_cuda=False,
num_train_epochs=4.0,
optim=OptimizerNames.ADAMW_BNB,
optim_args=None,
output_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-ml,
overwrite_output_dir=True,
past_index=-1,
per_device_eval_batch_size=8,
per_device_train_batch_size=4,
prediction_loss_only=False,
push_to_hub=True,
push_to_hub_model_id=None,
push_to_hub_organization=None,
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
ray_scope=last,
remove_unused_columns=True,
report_to=[],
resume_from_checkpoint=None,
run_name=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-ml,
save_on_each_node=False,
save_only_model=False,
save_safetensors=True,
save_steps=200,
save_strategy=IntervalStrategy.STEPS,
save_total_limit=3,
seed=42,
skip_memory_metrics=True,
split_batches=False,
tf32=None,
torch_compile=False,
torch_compile_backend=None,
torch_compile_mode=None,
torchdynamo=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
use_cpu=False,
use_ipex=False,
use_legacy_prediction_loop=False,
use_mps_device=False,
warmup_ratio=0.0,
warmup_steps=100,
weight_decay=0.0,
)
{'eval_loss': 5.472805500030518, 'eval_wer': 1.075673807878369, 'eval_runtime': 162.595, 'eval_samples_per_second': 4.078, 'eval_steps_per_second': 0.51, 'epoch': 0.41}
{'eval_loss': 5.127437114715576, 'eval_wer': 1.003800967519005, 'eval_runtime': 163.1607, 'eval_samples_per_second': 4.063, 'eval_steps_per_second': 0.509, 'epoch': 0.81}
{'loss': 6.5037, 'learning_rate': 0.0007890792291220557, 'epoch': 1.02}
{'eval_loss': 0.6166694760322571, 'eval_wer': 0.8130615065653075, 'eval_runtime': 161.3235, 'eval_samples_per_second': 4.11, 'eval_steps_per_second': 0.514, 'epoch': 1.22}
{'eval_loss': 0.328411728143692, 'eval_wer': 0.582930200414651, 'eval_runtime': 162.053, 'eval_samples_per_second': 4.091, 'eval_steps_per_second': 0.512, 'epoch': 1.63}
{'loss': 1.0482, 'learning_rate': 0.0005214132762312634, 'epoch': 2.03}
{'eval_loss': 0.3169207274913788, 'eval_wer': 0.5666897028334485, 'eval_runtime': 165.1028, 'eval_samples_per_second': 4.016, 'eval_steps_per_second': 0.503, 'epoch': 2.03}
{'eval_loss': 0.28758111596107483, 'eval_wer': 0.5425017277125086, 'eval_runtime': 160.9496, 'eval_samples_per_second': 4.119, 'eval_steps_per_second': 0.516, 'epoch': 2.44}
{'eval_loss': 0.2846720516681671, 'eval_wer': 0.5521769177608846, 'eval_runtime': 161.8788, 'eval_samples_per_second': 4.096, 'eval_steps_per_second': 0.513, 'epoch': 2.85}
{'loss': 0.4314, 'learning_rate': 0.00025374732334047106, 'epoch': 3.05}
{'eval_loss': 0.27460750937461853, 'eval_wer': 0.5393918451969593, 'eval_runtime': 160.7333, 'eval_samples_per_second': 4.125, 'eval_steps_per_second': 0.516, 'epoch': 3.25}
{'eval_loss': 0.26981213688850403, 'eval_wer': 0.5345542501727713, 'eval_runtime': 160.1257, 'eval_samples_per_second': 4.14, 'eval_steps_per_second': 0.518, 'epoch': 3.66}
{'train_runtime': 5112.0325, 'train_samples_per_second': 1.54, 'train_steps_per_second': 0.385, 'train_loss': 2.1205503649827913, 'epoch': 4.0}
***** train metrics *****
epoch = 4.0
train_loss = 2.1206
train_runtime = 1:25:12.03
train_samples = 1968
train_samples_per_second = 1.54
train_steps_per_second = 0.385
01/29/2024 21:22:32 - INFO - __main__ - *** Evaluate ***
***** eval metrics *****
epoch = 4.0
eval_loss = 0.2666
eval_runtime = 0:02:40.65
eval_samples = 663
eval_samples_per_second = 4.127
eval_steps_per_second = 0.517
eval_wer = 0.5349
01/29/2024 21:25:13 - INFO - __main__ - Saving adapter weights under /cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-ml/adapter.mal.safetensors...
|