sanchit-gandhi HF staff commited on
Commit
7ea655a
1 Parent(s): 888cbcb

Training in progress, step 1000

Browse files
.gitattributes CHANGED
@@ -33,4 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- wandb/run-20240327_141031-aoxf8fxn/run-aoxf8fxn.wandb filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/run-20240327_190418-lwtbcr8s/run-lwtbcr8s.wandb filter=lfs diff=lfs merge=lfs -text
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fadd9d0fda5b60e26d882d14f7f3b21183f6d681c79958372fb20f11f5e9c022
3
  size 3025686376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9462752283f48808e567cfa9b61a780fca95285fcf13c52272085cb1fce9a0b8
3
  size 3025686376
run.sh CHANGED
@@ -16,6 +16,7 @@ python run_speech_recognition_seq2seq.py \
16
  --eval_steps="1000" \
17
  --save_strategy="steps" \
18
  --save_steps="1000" \
 
19
  --generation_max_length="225" \
20
  --preprocessing_num_workers="1" \
21
  --dataloader_num_workers="4" \
 
16
  --eval_steps="1000" \
17
  --save_strategy="steps" \
18
  --save_steps="1000" \
19
+ --save_total_limit="1" \
20
  --generation_max_length="225" \
21
  --preprocessing_num_workers="1" \
22
  --dataloader_num_workers="4" \
runs/Mar27_19-04-06_hf-dgx-01/events.out.tfevents.1711562657.hf-dgx-01.1893386.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0a09f8a71f8134c693f79ce3983c1f5e7aeb209e0c4d1d3e01b33d4f51e8fd2
3
+ size 14123
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8fef62a116fd373aed4cbee49637759d2e8da863586704c7abd35aeea4042a0
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69039d984d087c8af1175b297d7c95f99b4285dbb1d726c14590ca1ccf214e41
3
  size 5048
wandb/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log CHANGED
@@ -1,28 +1,28 @@
1
- 2024-03-27 14:10:31,568 INFO MainThread:1482719 [wandb_setup.py:_flush():76] Current SDK version is 0.16.2
2
- 2024-03-27 14:10:31,568 INFO MainThread:1482719 [wandb_setup.py:_flush():76] Configure stats pid to 1482719
3
- 2024-03-27 14:10:31,568 INFO MainThread:1482719 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/.config/wandb/settings
4
- 2024-03-27 14:10:31,568 INFO MainThread:1482719 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/distil-large-v3-hi-ft/wandb/settings
5
- 2024-03-27 14:10:31,568 INFO MainThread:1482719 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
- 2024-03-27 14:10:31,568 INFO MainThread:1482719 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
- 2024-03-27 14:10:31,568 INFO MainThread:1482719 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program_abspath': '/home/sanchit/distil-large-v3-hi-ft/run_speech_recognition_seq2seq.py', 'program': 'run_speech_recognition_seq2seq.py'}
8
- 2024-03-27 14:10:31,568 INFO MainThread:1482719 [wandb_init.py:_log_setup():526] Logging user logs to /home/sanchit/distil-large-v3-hi-ft/wandb/run-20240327_141031-aoxf8fxn/logs/debug.log
9
- 2024-03-27 14:10:31,568 INFO MainThread:1482719 [wandb_init.py:_log_setup():527] Logging internal logs to /home/sanchit/distil-large-v3-hi-ft/wandb/run-20240327_141031-aoxf8fxn/logs/debug-internal.log
10
- 2024-03-27 14:10:31,569 INFO MainThread:1482719 [wandb_init.py:init():566] calling init triggers
11
- 2024-03-27 14:10:31,569 INFO MainThread:1482719 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
12
  config: {}
13
- 2024-03-27 14:10:31,569 INFO MainThread:1482719 [wandb_init.py:init():616] starting backend
14
- 2024-03-27 14:10:31,569 INFO MainThread:1482719 [wandb_init.py:init():620] setting up manager
15
- 2024-03-27 14:10:31,569 INFO MainThread:1482719 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
16
- 2024-03-27 14:10:31,570 INFO MainThread:1482719 [wandb_init.py:init():628] backend started and connected
17
- 2024-03-27 14:10:31,574 INFO MainThread:1482719 [wandb_init.py:init():720] updated telemetry
18
- 2024-03-27 14:10:31,631 INFO MainThread:1482719 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
19
- 2024-03-27 14:10:31,976 INFO MainThread:1482719 [wandb_run.py:_on_init():2254] communicating current version
20
- 2024-03-27 14:10:32,002 INFO MainThread:1482719 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.5 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
21
 
22
- 2024-03-27 14:10:32,002 INFO MainThread:1482719 [wandb_init.py:init():804] starting run threads in backend
23
- 2024-03-27 14:10:32,531 INFO MainThread:1482719 [wandb_run.py:_console_start():2233] atexit reg
24
- 2024-03-27 14:10:32,531 INFO MainThread:1482719 [wandb_run.py:_redirect():2088] redirect: wrap_raw
25
- 2024-03-27 14:10:32,531 INFO MainThread:1482719 [wandb_run.py:_redirect():2153] Wrapping output streams.
26
- 2024-03-27 14:10:32,531 INFO MainThread:1482719 [wandb_run.py:_redirect():2178] Redirects installed.
27
- 2024-03-27 14:10:32,532 INFO MainThread:1482719 [wandb_init.py:init():847] run started, returning control to user process
28
- 2024-03-27 14:10:32,534 INFO MainThread:1482719 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 2, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'distil-whisper/distil-large-v3', 'transformers_version': '4.40.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 5000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Mar27_14-10-21_hf-dgx-01', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 4, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
 
1
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Current SDK version is 0.16.2
2
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Configure stats pid to 1893386
3
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/.config/wandb/settings
4
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/distil-large-v3-hi-ft/wandb/settings
5
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program_abspath': '/home/sanchit/distil-large-v3-hi-ft/run_speech_recognition_seq2seq.py', 'program': 'run_speech_recognition_seq2seq.py'}
8
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:_log_setup():526] Logging user logs to /home/sanchit/distil-large-v3-hi-ft/wandb/run-20240327_190418-lwtbcr8s/logs/debug.log
9
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:_log_setup():527] Logging internal logs to /home/sanchit/distil-large-v3-hi-ft/wandb/run-20240327_190418-lwtbcr8s/logs/debug-internal.log
10
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:init():566] calling init triggers
11
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
12
  config: {}
13
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:init():616] starting backend
14
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:init():620] setting up manager
15
+ 2024-03-27 19:04:18,563 INFO MainThread:1893386 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
16
+ 2024-03-27 19:04:18,564 INFO MainThread:1893386 [wandb_init.py:init():628] backend started and connected
17
+ 2024-03-27 19:04:18,568 INFO MainThread:1893386 [wandb_init.py:init():720] updated telemetry
18
+ 2024-03-27 19:04:18,639 INFO MainThread:1893386 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
19
+ 2024-03-27 19:04:18,995 INFO MainThread:1893386 [wandb_run.py:_on_init():2254] communicating current version
20
+ 2024-03-27 19:04:19,034 INFO MainThread:1893386 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.5 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
21
 
22
+ 2024-03-27 19:04:19,034 INFO MainThread:1893386 [wandb_init.py:init():804] starting run threads in backend
23
+ 2024-03-27 19:04:19,118 INFO MainThread:1893386 [wandb_run.py:_console_start():2233] atexit reg
24
+ 2024-03-27 19:04:19,118 INFO MainThread:1893386 [wandb_run.py:_redirect():2088] redirect: wrap_raw
25
+ 2024-03-27 19:04:19,119 INFO MainThread:1893386 [wandb_run.py:_redirect():2153] Wrapping output streams.
26
+ 2024-03-27 19:04:19,119 INFO MainThread:1893386 [wandb_run.py:_redirect():2178] Redirects installed.
27
+ 2024-03-27 19:04:19,119 INFO MainThread:1893386 [wandb_init.py:init():847] run started, returning control to user process
28
+ 2024-03-27 19:04:19,121 INFO MainThread:1893386 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 2, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'distil-whisper/distil-large-v3', 'transformers_version': '4.40.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 5000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Mar27_19-04-06_hf-dgx-01', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 4, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
wandb/run-20240327_190418-lwtbcr8s/files/config.yaml ADDED
@@ -0,0 +1,751 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.8.10
7
+ cli_version: 0.16.2
8
+ framework: huggingface
9
+ huggingface_version: 4.40.0.dev0
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ start_time: 1711562658.56459
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 2
17
+ - 3
18
+ - 5
19
+ - 11
20
+ - 12
21
+ - 49
22
+ - 51
23
+ - 53
24
+ - 55
25
+ - 71
26
+ - 98
27
+ - 100
28
+ 2:
29
+ - 1
30
+ - 2
31
+ - 3
32
+ - 5
33
+ - 11
34
+ - 12
35
+ - 49
36
+ - 51
37
+ - 53
38
+ - 55
39
+ - 71
40
+ - 98
41
+ - 100
42
+ 3:
43
+ - 7
44
+ - 23
45
+ 4: 3.8.10
46
+ 5: 0.16.2
47
+ 6: 4.40.0.dev0
48
+ 8:
49
+ - 5
50
+ 9:
51
+ 1: transformers_trainer
52
+ 13: linux-x86_64
53
+ m:
54
+ - 1: train/global_step
55
+ 6:
56
+ - 3
57
+ - 1: train/loss
58
+ 5: 1
59
+ 6:
60
+ - 1
61
+ - 1: train/grad_norm
62
+ 5: 1
63
+ 6:
64
+ - 1
65
+ - 1: train/learning_rate
66
+ 5: 1
67
+ 6:
68
+ - 1
69
+ - 1: train/epoch
70
+ 5: 1
71
+ 6:
72
+ - 1
73
+ - 1: eval/loss
74
+ 5: 1
75
+ 6:
76
+ - 1
77
+ - 1: eval/wer
78
+ 5: 1
79
+ 6:
80
+ - 1
81
+ - 1: eval/runtime
82
+ 5: 1
83
+ 6:
84
+ - 1
85
+ - 1: eval/samples_per_second
86
+ 5: 1
87
+ 6:
88
+ - 1
89
+ - 1: eval/steps_per_second
90
+ 5: 1
91
+ 6:
92
+ - 1
93
+ vocab_size:
94
+ desc: null
95
+ value: 51866
96
+ num_mel_bins:
97
+ desc: null
98
+ value: 128
99
+ d_model:
100
+ desc: null
101
+ value: 1280
102
+ encoder_layers:
103
+ desc: null
104
+ value: 32
105
+ encoder_attention_heads:
106
+ desc: null
107
+ value: 20
108
+ decoder_layers:
109
+ desc: null
110
+ value: 2
111
+ decoder_attention_heads:
112
+ desc: null
113
+ value: 20
114
+ decoder_ffn_dim:
115
+ desc: null
116
+ value: 5120
117
+ encoder_ffn_dim:
118
+ desc: null
119
+ value: 5120
120
+ dropout:
121
+ desc: null
122
+ value: 0.0
123
+ attention_dropout:
124
+ desc: null
125
+ value: 0.0
126
+ activation_dropout:
127
+ desc: null
128
+ value: 0.0
129
+ activation_function:
130
+ desc: null
131
+ value: gelu
132
+ init_std:
133
+ desc: null
134
+ value: 0.02
135
+ encoder_layerdrop:
136
+ desc: null
137
+ value: 0.0
138
+ decoder_layerdrop:
139
+ desc: null
140
+ value: 0.0
141
+ use_cache:
142
+ desc: null
143
+ value: true
144
+ num_hidden_layers:
145
+ desc: null
146
+ value: 32
147
+ scale_embedding:
148
+ desc: null
149
+ value: false
150
+ max_source_positions:
151
+ desc: null
152
+ value: 1500
153
+ max_target_positions:
154
+ desc: null
155
+ value: 448
156
+ classifier_proj_size:
157
+ desc: null
158
+ value: 256
159
+ use_weighted_layer_sum:
160
+ desc: null
161
+ value: false
162
+ apply_spec_augment:
163
+ desc: null
164
+ value: false
165
+ mask_time_prob:
166
+ desc: null
167
+ value: 0.05
168
+ mask_time_length:
169
+ desc: null
170
+ value: 10
171
+ mask_time_min_masks:
172
+ desc: null
173
+ value: 2
174
+ mask_feature_prob:
175
+ desc: null
176
+ value: 0.0
177
+ mask_feature_length:
178
+ desc: null
179
+ value: 10
180
+ mask_feature_min_masks:
181
+ desc: null
182
+ value: 0
183
+ median_filter_width:
184
+ desc: null
185
+ value: 7
186
+ return_dict:
187
+ desc: null
188
+ value: true
189
+ output_hidden_states:
190
+ desc: null
191
+ value: false
192
+ output_attentions:
193
+ desc: null
194
+ value: false
195
+ torchscript:
196
+ desc: null
197
+ value: false
198
+ torch_dtype:
199
+ desc: null
200
+ value: float16
201
+ use_bfloat16:
202
+ desc: null
203
+ value: false
204
+ tf_legacy_loss:
205
+ desc: null
206
+ value: false
207
+ pruned_heads:
208
+ desc: null
209
+ value: {}
210
+ tie_word_embeddings:
211
+ desc: null
212
+ value: true
213
+ chunk_size_feed_forward:
214
+ desc: null
215
+ value: 0
216
+ is_encoder_decoder:
217
+ desc: null
218
+ value: true
219
+ is_decoder:
220
+ desc: null
221
+ value: false
222
+ cross_attention_hidden_size:
223
+ desc: null
224
+ value: null
225
+ add_cross_attention:
226
+ desc: null
227
+ value: false
228
+ tie_encoder_decoder:
229
+ desc: null
230
+ value: false
231
+ max_length:
232
+ desc: null
233
+ value: 448
234
+ min_length:
235
+ desc: null
236
+ value: 0
237
+ do_sample:
238
+ desc: null
239
+ value: false
240
+ early_stopping:
241
+ desc: null
242
+ value: false
243
+ num_beams:
244
+ desc: null
245
+ value: 1
246
+ num_beam_groups:
247
+ desc: null
248
+ value: 1
249
+ diversity_penalty:
250
+ desc: null
251
+ value: 0.0
252
+ temperature:
253
+ desc: null
254
+ value: 1.0
255
+ top_k:
256
+ desc: null
257
+ value: 50
258
+ top_p:
259
+ desc: null
260
+ value: 1.0
261
+ typical_p:
262
+ desc: null
263
+ value: 1.0
264
+ repetition_penalty:
265
+ desc: null
266
+ value: 1.0
267
+ length_penalty:
268
+ desc: null
269
+ value: 1.0
270
+ no_repeat_ngram_size:
271
+ desc: null
272
+ value: 0
273
+ encoder_no_repeat_ngram_size:
274
+ desc: null
275
+ value: 0
276
+ bad_words_ids:
277
+ desc: null
278
+ value: null
279
+ num_return_sequences:
280
+ desc: null
281
+ value: 1
282
+ output_scores:
283
+ desc: null
284
+ value: false
285
+ return_dict_in_generate:
286
+ desc: null
287
+ value: false
288
+ forced_bos_token_id:
289
+ desc: null
290
+ value: null
291
+ forced_eos_token_id:
292
+ desc: null
293
+ value: null
294
+ remove_invalid_values:
295
+ desc: null
296
+ value: false
297
+ exponential_decay_length_penalty:
298
+ desc: null
299
+ value: null
300
+ suppress_tokens:
301
+ desc: null
302
+ value: null
303
+ begin_suppress_tokens:
304
+ desc: null
305
+ value:
306
+ - 220
307
+ - 50257
308
+ architectures:
309
+ desc: null
310
+ value:
311
+ - WhisperForConditionalGeneration
312
+ finetuning_task:
313
+ desc: null
314
+ value: null
315
+ id2label:
316
+ desc: null
317
+ value:
318
+ '0': LABEL_0
319
+ '1': LABEL_1
320
+ label2id:
321
+ desc: null
322
+ value:
323
+ LABEL_0: 0
324
+ LABEL_1: 1
325
+ tokenizer_class:
326
+ desc: null
327
+ value: null
328
+ prefix:
329
+ desc: null
330
+ value: null
331
+ bos_token_id:
332
+ desc: null
333
+ value: 50257
334
+ pad_token_id:
335
+ desc: null
336
+ value: 50256
337
+ eos_token_id:
338
+ desc: null
339
+ value: 50257
340
+ sep_token_id:
341
+ desc: null
342
+ value: null
343
+ decoder_start_token_id:
344
+ desc: null
345
+ value: 50258
346
+ task_specific_params:
347
+ desc: null
348
+ value: null
349
+ problem_type:
350
+ desc: null
351
+ value: null
352
+ _name_or_path:
353
+ desc: null
354
+ value: distil-whisper/distil-large-v3
355
+ transformers_version:
356
+ desc: null
357
+ value: 4.40.0.dev0
358
+ model_type:
359
+ desc: null
360
+ value: whisper
361
+ forced_decoder_ids:
362
+ desc: null
363
+ value: null
364
+ output_dir:
365
+ desc: null
366
+ value: ./
367
+ overwrite_output_dir:
368
+ desc: null
369
+ value: true
370
+ do_train:
371
+ desc: null
372
+ value: true
373
+ do_eval:
374
+ desc: null
375
+ value: true
376
+ do_predict:
377
+ desc: null
378
+ value: false
379
+ evaluation_strategy:
380
+ desc: null
381
+ value: steps
382
+ prediction_loss_only:
383
+ desc: null
384
+ value: false
385
+ per_device_train_batch_size:
386
+ desc: null
387
+ value: 32
388
+ per_device_eval_batch_size:
389
+ desc: null
390
+ value: 32
391
+ per_gpu_train_batch_size:
392
+ desc: null
393
+ value: null
394
+ per_gpu_eval_batch_size:
395
+ desc: null
396
+ value: null
397
+ gradient_accumulation_steps:
398
+ desc: null
399
+ value: 1
400
+ eval_accumulation_steps:
401
+ desc: null
402
+ value: null
403
+ eval_delay:
404
+ desc: null
405
+ value: 0
406
+ learning_rate:
407
+ desc: null
408
+ value: 0.0001
409
+ weight_decay:
410
+ desc: null
411
+ value: 0.0
412
+ adam_beta1:
413
+ desc: null
414
+ value: 0.9
415
+ adam_beta2:
416
+ desc: null
417
+ value: 0.999
418
+ adam_epsilon:
419
+ desc: null
420
+ value: 1.0e-08
421
+ max_grad_norm:
422
+ desc: null
423
+ value: 1.0
424
+ num_train_epochs:
425
+ desc: null
426
+ value: 3.0
427
+ max_steps:
428
+ desc: null
429
+ value: 5000
430
+ lr_scheduler_type:
431
+ desc: null
432
+ value: linear
433
+ lr_scheduler_kwargs:
434
+ desc: null
435
+ value: {}
436
+ warmup_ratio:
437
+ desc: null
438
+ value: 0.0
439
+ warmup_steps:
440
+ desc: null
441
+ value: 500
442
+ log_level:
443
+ desc: null
444
+ value: passive
445
+ log_level_replica:
446
+ desc: null
447
+ value: warning
448
+ log_on_each_node:
449
+ desc: null
450
+ value: true
451
+ logging_dir:
452
+ desc: null
453
+ value: ./runs/Mar27_19-04-06_hf-dgx-01
454
+ logging_strategy:
455
+ desc: null
456
+ value: steps
457
+ logging_first_step:
458
+ desc: null
459
+ value: false
460
+ logging_steps:
461
+ desc: null
462
+ value: 25
463
+ logging_nan_inf_filter:
464
+ desc: null
465
+ value: true
466
+ save_strategy:
467
+ desc: null
468
+ value: steps
469
+ save_steps:
470
+ desc: null
471
+ value: 1000
472
+ save_total_limit:
473
+ desc: null
474
+ value: 1
475
+ save_safetensors:
476
+ desc: null
477
+ value: true
478
+ save_on_each_node:
479
+ desc: null
480
+ value: false
481
+ save_only_model:
482
+ desc: null
483
+ value: false
484
+ no_cuda:
485
+ desc: null
486
+ value: false
487
+ use_cpu:
488
+ desc: null
489
+ value: false
490
+ use_mps_device:
491
+ desc: null
492
+ value: false
493
+ seed:
494
+ desc: null
495
+ value: 42
496
+ data_seed:
497
+ desc: null
498
+ value: null
499
+ jit_mode_eval:
500
+ desc: null
501
+ value: false
502
+ use_ipex:
503
+ desc: null
504
+ value: false
505
+ bf16:
506
+ desc: null
507
+ value: false
508
+ fp16:
509
+ desc: null
510
+ value: true
511
+ fp16_opt_level:
512
+ desc: null
513
+ value: O1
514
+ half_precision_backend:
515
+ desc: null
516
+ value: auto
517
+ bf16_full_eval:
518
+ desc: null
519
+ value: false
520
+ fp16_full_eval:
521
+ desc: null
522
+ value: false
523
+ tf32:
524
+ desc: null
525
+ value: null
526
+ local_rank:
527
+ desc: null
528
+ value: 0
529
+ ddp_backend:
530
+ desc: null
531
+ value: null
532
+ tpu_num_cores:
533
+ desc: null
534
+ value: null
535
+ tpu_metrics_debug:
536
+ desc: null
537
+ value: false
538
+ debug:
539
+ desc: null
540
+ value: []
541
+ dataloader_drop_last:
542
+ desc: null
543
+ value: false
544
+ eval_steps:
545
+ desc: null
546
+ value: 1000
547
+ dataloader_num_workers:
548
+ desc: null
549
+ value: 4
550
+ dataloader_prefetch_factor:
551
+ desc: null
552
+ value: null
553
+ past_index:
554
+ desc: null
555
+ value: -1
556
+ run_name:
557
+ desc: null
558
+ value: ./
559
+ disable_tqdm:
560
+ desc: null
561
+ value: false
562
+ remove_unused_columns:
563
+ desc: null
564
+ value: true
565
+ label_names:
566
+ desc: null
567
+ value: null
568
+ load_best_model_at_end:
569
+ desc: null
570
+ value: false
571
+ metric_for_best_model:
572
+ desc: null
573
+ value: null
574
+ greater_is_better:
575
+ desc: null
576
+ value: null
577
+ ignore_data_skip:
578
+ desc: null
579
+ value: false
580
+ fsdp:
581
+ desc: null
582
+ value: []
583
+ fsdp_min_num_params:
584
+ desc: null
585
+ value: 0
586
+ fsdp_config:
587
+ desc: null
588
+ value:
589
+ min_num_params: 0
590
+ xla: false
591
+ xla_fsdp_v2: false
592
+ xla_fsdp_grad_ckpt: false
593
+ fsdp_transformer_layer_cls_to_wrap:
594
+ desc: null
595
+ value: null
596
+ accelerator_config:
597
+ desc: null
598
+ value:
599
+ split_batches: false
600
+ dispatch_batches: null
601
+ even_batches: true
602
+ use_seedable_sampler: true
603
+ deepspeed:
604
+ desc: null
605
+ value: null
606
+ label_smoothing_factor:
607
+ desc: null
608
+ value: 0.0
609
+ optim:
610
+ desc: null
611
+ value: adamw_torch
612
+ optim_args:
613
+ desc: null
614
+ value: null
615
+ adafactor:
616
+ desc: null
617
+ value: false
618
+ group_by_length:
619
+ desc: null
620
+ value: false
621
+ length_column_name:
622
+ desc: null
623
+ value: input_length
624
+ report_to:
625
+ desc: null
626
+ value:
627
+ - tensorboard
628
+ - wandb
629
+ ddp_find_unused_parameters:
630
+ desc: null
631
+ value: null
632
+ ddp_bucket_cap_mb:
633
+ desc: null
634
+ value: null
635
+ ddp_broadcast_buffers:
636
+ desc: null
637
+ value: null
638
+ dataloader_pin_memory:
639
+ desc: null
640
+ value: true
641
+ dataloader_persistent_workers:
642
+ desc: null
643
+ value: false
644
+ skip_memory_metrics:
645
+ desc: null
646
+ value: true
647
+ use_legacy_prediction_loop:
648
+ desc: null
649
+ value: false
650
+ push_to_hub:
651
+ desc: null
652
+ value: true
653
+ resume_from_checkpoint:
654
+ desc: null
655
+ value: null
656
+ hub_model_id:
657
+ desc: null
658
+ value: null
659
+ hub_strategy:
660
+ desc: null
661
+ value: every_save
662
+ hub_token:
663
+ desc: null
664
+ value: <HUB_TOKEN>
665
+ hub_private_repo:
666
+ desc: null
667
+ value: false
668
+ hub_always_push:
669
+ desc: null
670
+ value: false
671
+ gradient_checkpointing:
672
+ desc: null
673
+ value: true
674
+ gradient_checkpointing_kwargs:
675
+ desc: null
676
+ value: null
677
+ include_inputs_for_metrics:
678
+ desc: null
679
+ value: false
680
+ fp16_backend:
681
+ desc: null
682
+ value: auto
683
+ push_to_hub_model_id:
684
+ desc: null
685
+ value: null
686
+ push_to_hub_organization:
687
+ desc: null
688
+ value: null
689
+ push_to_hub_token:
690
+ desc: null
691
+ value: <PUSH_TO_HUB_TOKEN>
692
+ mp_parameters:
693
+ desc: null
694
+ value: ''
695
+ auto_find_batch_size:
696
+ desc: null
697
+ value: false
698
+ full_determinism:
699
+ desc: null
700
+ value: false
701
+ torchdynamo:
702
+ desc: null
703
+ value: null
704
+ ray_scope:
705
+ desc: null
706
+ value: last
707
+ ddp_timeout:
708
+ desc: null
709
+ value: 1800
710
+ torch_compile:
711
+ desc: null
712
+ value: false
713
+ torch_compile_backend:
714
+ desc: null
715
+ value: null
716
+ torch_compile_mode:
717
+ desc: null
718
+ value: null
719
+ dispatch_batches:
720
+ desc: null
721
+ value: null
722
+ split_batches:
723
+ desc: null
724
+ value: null
725
+ include_tokens_per_second:
726
+ desc: null
727
+ value: false
728
+ include_num_input_tokens_seen:
729
+ desc: null
730
+ value: false
731
+ neftune_noise_alpha:
732
+ desc: null
733
+ value: null
734
+ optim_target_modules:
735
+ desc: null
736
+ value: null
737
+ sortish_sampler:
738
+ desc: null
739
+ value: false
740
+ predict_with_generate:
741
+ desc: null
742
+ value: true
743
+ generation_max_length:
744
+ desc: null
745
+ value: 225
746
+ generation_num_beams:
747
+ desc: null
748
+ value: null
749
+ generation_config:
750
+ desc: null
751
+ value: null
wandb/run-20240327_190418-lwtbcr8s/files/output.log ADDED
@@ -0,0 +1,1168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ 0%| | 0/5000 [00:00<?, ?it/s]/home/sanchit/hf/lib/python3.8/site-packages/torch/utils/checkpoint.py:460: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
3
+ warnings.warn(
4
+ [WARNING|logging.py:329] 2024-03-27 19:04:32,560 >> `use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache = False`...
5
+
6
+
7
+
8
+
9
+
10
+
11
+
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+ 0%|▍ | 25/5000 [01:42<7:34:43, 5.48s/it]
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
53
+
54
+ 1%|▊ | 49/5000 [04:07<8:22:42, 6.09s/it]
55
+
56
+
57
+
58
+
59
+
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+ 1%|█▏ | 74/5000 [06:39<8:19:38, 6.09s/it]
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+ 2%|█▌ | 99/5000 [08:56<8:16:05, 6.07s/it]
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+ 2%|█▉ | 125/5000 [11:34<8:14:25, 6.09s/it]
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+ 3%|██▎ | 149/5000 [13:49<5:08:20, 3.81s/it]
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+
183
+
184
+ 3%|██▋ | 174/5000 [16:16<8:07:28, 6.06s/it]
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+
204
+
205
+
206
+
207
+
208
+
209
+
210
+ 4%|███ | 199/5000 [18:48<8:06:00, 6.07s/it]
211
+
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+
227
+
228
+
229
+
230
+
231
+
232
+
233
+
234
+
235
+
236
+ 4%|███▍ | 224/5000 [21:12<9:32:38, 7.19s/it]
237
+
238
+
239
+
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+
249
+
250
+
251
+
252
+
253
+
254
+
255
+
256
+
257
+
258
+
259
+
260
+
261
+
262
+ 5%|███▉ | 249/5000 [23:42<8:02:12, 6.09s/it]
263
+
264
+
265
+
266
+
267
+
268
+
269
+
270
+
271
+
272
+
273
+
274
+
275
+
276
+
277
+
278
+
279
+
280
+
281
+
282
+
283
+
284
+
285
+
286
+
287
+
288
+
289
+ 6%|████▎ | 275/5000 [26:19<7:56:48, 6.05s/it]
290
+
291
+
292
+
293
+
294
+
295
+
296
+
297
+
298
+
299
+
300
+
301
+
302
+
303
+
304
+
305
+
306
+
307
+
308
+
309
+
310
+
311
+
312
+
313
+
314
+ 6%|████▋ | 299/5000 [28:30<7:54:23, 6.05s/it]
315
+
316
+
317
+
318
+
319
+
320
+
321
+
322
+
323
+
324
+
325
+
326
+
327
+
328
+
329
+
330
+
331
+
332
+
333
+
334
+
335
+
336
+
337
+
338
+
339
+
340
+ 6%|█████ | 324/5000 [30:40<6:38:21, 5.11s/it]
341
+
342
+
343
+
344
+
345
+
346
+
347
+
348
+
349
+
350
+
351
+
352
+
353
+
354
+
355
+
356
+
357
+
358
+
359
+
360
+
361
+
362
+
363
+
364
+
365
+
366
+ 7%|█████▍ | 349/5000 [32:57<7:12:34, 5.58s/it]
367
+
368
+
369
+
370
+
371
+
372
+
373
+
374
+
375
+
376
+
377
+
378
+
379
+
380
+
381
+
382
+
383
+
384
+
385
+
386
+
387
+
388
+
389
+
390
+
391
+
392
+ 7%|█████▊ | 374/5000 [35:15<7:05:59, 5.53s/it]
393
+
394
+
395
+
396
+
397
+
398
+
399
+
400
+
401
+
402
+
403
+
404
+
405
+
406
+
407
+
408
+
409
+
410
+
411
+
412
+
413
+
414
+
415
+
416
+
417
+
418
+ 8%|██████▏ | 399/5000 [37:31<6:58:32, 5.46s/it]
419
+
420
+
421
+
422
+
423
+
424
+
425
+
426
+
427
+
428
+
429
+
430
+
431
+
432
+
433
+
434
+
435
+
436
+
437
+
438
+
439
+
440
+
441
+
442
+
443
+
444
+ 8%|██████▌ | 424/5000 [39:49<6:53:43, 5.42s/it]
445
+
446
+
447
+
448
+
449
+
450
+
451
+
452
+
453
+
454
+
455
+
456
+
457
+
458
+
459
+
460
+
461
+
462
+
463
+
464
+
465
+
466
+
467
+
468
+
469
+
470
+ 9%|███████ | 449/5000 [42:16<7:51:23, 6.21s/it]
471
+
472
+
473
+
474
+
475
+
476
+
477
+
478
+
479
+
480
+
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+
489
+
490
+
491
+
492
+
493
+
494
+
495
+
496
+
497
+ 10%|███████▍ | 475/5000 [44:39<6:45:52, 5.38s/it]
498
+
499
+
500
+
501
+
502
+
503
+
504
+
505
+
506
+
507
+
508
+
509
+
510
+
511
+
512
+
513
+
514
+
515
+
516
+
517
+
518
+
519
+
520
+
521
+
522
+ 10%|███████▊ | 499/5000 [46:53<7:02:26, 5.63s/it]
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+
534
+
535
+
536
+
537
+
538
+
539
+
540
+
541
+
542
+
543
+
544
+
545
+
546
+
547
+
548
+ 10%|████████▏ | 524/5000 [49:11<6:54:40, 5.56s/it]
549
+
550
+
551
+
552
+
553
+
554
+
555
+
556
+
557
+
558
+
559
+
560
+
561
+
562
+
563
+
564
+
565
+
566
+
567
+
568
+
569
+
570
+
571
+
572
+
573
+
574
+ 11%|████████▌ | 549/5000 [51:29<6:56:18, 5.61s/it]
575
+
576
+
577
+
578
+
579
+
580
+
581
+
582
+
583
+
584
+
585
+
586
+
587
+
588
+
589
+
590
+
591
+
592
+
593
+
594
+
595
+
596
+
597
+
598
+
599
+
600
+
601
+ 12%|████████▉ | 575/5000 [53:37<7:16:11, 5.91s/it]
602
+
603
+
604
+
605
+
606
+
607
+
608
+
609
+
610
+
611
+
612
+
613
+
614
+
615
+
616
+
617
+
618
+
619
+
620
+
621
+
622
+
623
+
624
+
625
+
626
+
627
+ 12%|█████████▎ | 600/5000 [55:56<4:32:00, 3.71s/it]
628
+
629
+
630
+
631
+
632
+
633
+
634
+
635
+
636
+
637
+
638
+
639
+
640
+
641
+
642
+
643
+
644
+
645
+
646
+
647
+
648
+
649
+
650
+
651
+
652
+ 12%|█████████▋ | 624/5000 [58:19<7:22:51, 6.07s/it]
653
+
654
+
655
+
656
+
657
+
658
+
659
+
660
+
661
+
662
+
663
+
664
+
665
+
666
+
667
+
668
+
669
+
670
+
671
+
672
+
673
+
674
+
675
+
676
+
677
+
678
+ 13%|█████████▊ | 649/5000 [1:00:51<7:22:34, 6.10s/it]
679
+
680
+
681
+
682
+
683
+
684
+
685
+
686
+
687
+
688
+
689
+
690
+
691
+
692
+
693
+
694
+
695
+
696
+
697
+
698
+
699
+
700
+
701
+
702
+
703
+
704
+
705
+ 14%|██████████▎ | 675/5000 [1:03:27<7:24:53, 6.17s/it]
706
+
707
+
708
+
709
+
710
+
711
+
712
+
713
+
714
+
715
+
716
+
717
+
718
+
719
+
720
+
721
+
722
+
723
+
724
+
725
+
726
+
727
+
728
+
729
+
730
+
731
+ 14%|██████████▋ | 700/5000 [1:05:59<7:16:57, 6.10s/it]
732
+
733
+
734
+
735
+
736
+
737
+
738
+
739
+
740
+
741
+
742
+
743
+
744
+
745
+
746
+
747
+
748
+
749
+
750
+
751
+
752
+
753
+
754
+
755
+
756
+
757
+ 14%|███████████ | 724/5000 [1:08:26<7:15:14, 6.11s/it]
758
+
759
+
760
+
761
+
762
+
763
+
764
+
765
+
766
+
767
+
768
+
769
+
770
+
771
+
772
+
773
+
774
+
775
+
776
+
777
+
778
+
779
+
780
+
781
+
782
+
783
+
784
+ 15%|███████████▍ | 750/5000 [1:10:50<7:11:55, 6.10s/it]
785
+
786
+
787
+
788
+
789
+
790
+
791
+
792
+
793
+
794
+
795
+
796
+
797
+
798
+
799
+
800
+
801
+
802
+
803
+
804
+
805
+
806
+
807
+
808
+
809
+
810
+ 16%|███████████▊ | 775/5000 [1:13:23<7:10:59, 6.12s/it]
811
+
812
+
813
+
814
+
815
+
816
+
817
+
818
+
819
+
820
+
821
+
822
+
823
+
824
+
825
+
826
+
827
+
828
+
829
+
830
+
831
+
832
+
833
+
834
+
835
+ 16%|████████████▏ | 799/5000 [1:15:35<6:23:31, 5.48s/it]
836
+
837
+
838
+
839
+
840
+
841
+
842
+
843
+
844
+
845
+
846
+
847
+
848
+
849
+
850
+
851
+
852
+
853
+
854
+
855
+
856
+
857
+
858
+
859
+
860
+
861
+
862
+ 16%|████████████▌ | 825/5000 [1:18:13<7:05:11, 6.11s/it]
863
+
864
+
865
+
866
+
867
+
868
+
869
+
870
+
871
+
872
+
873
+
874
+
875
+
876
+
877
+
878
+
879
+
880
+
881
+
882
+
883
+
884
+
885
+
886
+
887
+ 17%|████████████▉ | 849/5000 [1:20:41<7:03:09, 6.12s/it]
888
+
889
+
890
+
891
+
892
+
893
+
894
+
895
+
896
+
897
+
898
+
899
+
900
+
901
+
902
+
903
+
904
+
905
+
906
+
907
+
908
+
909
+
910
+
911
+
912
+
913
+ 17%|█████████████▎ | 874/5000 [1:22:41<6:21:02, 5.54s/it]
914
+
915
+
916
+
917
+
918
+
919
+
920
+
921
+
922
+
923
+
924
+
925
+
926
+
927
+
928
+
929
+
930
+
931
+
932
+
933
+
934
+
935
+
936
+
937
+
938
+
939
+
940
+ 18%|█████████████▋ | 900/5000 [1:25:15<6:29:20, 5.70s/it]
941
+
942
+
943
+
944
+
945
+
946
+
947
+
948
+
949
+
950
+
951
+
952
+
953
+
954
+
955
+
956
+
957
+
958
+
959
+
960
+
961
+
962
+
963
+
964
+
965
+
966
+ 18%|██████████████ | 925/5000 [1:27:33<6:20:58, 5.61s/it]
967
+
968
+
969
+
970
+
971
+
972
+
973
+
974
+
975
+
976
+
977
+
978
+
979
+
980
+
981
+
982
+
983
+
984
+
985
+
986
+
987
+
988
+
989
+
990
+
991
+ 19%|██████████████▍ | 949/5000 [1:29:44<6:00:38, 5.34s/it]
992
+
993
+
994
+
995
+
996
+
997
+
998
+
999
+
1000
+
1001
+
1002
+
1003
+
1004
+
1005
+
1006
+
1007
+
1008
+
1009
+
1010
+
1011
+
1012
+
1013
+
1014
+
1015
+
1016
+
1017
+
1018
+ 20%|██████████████▊ | 975/5000 [1:32:06<5:56:54, 5.32s/it]
1019
+
1020
+
1021
+
1022
+
1023
+
1024
+
1025
+
1026
+
1027
+
1028
+
1029
+
1030
+
1031
+
1032
+
1033
+
1034
+
1035
+
1036
+
1037
+
1038
+
1039
+
1040
+
1041
+
1042
+
1043
+ 20%|███████████████ | 1000/5000 [1:34:23<6:09:03, 5.54s/it][INFO|trainer.py:768] 2024-03-27 20:38:42,868 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.
1044
+ [INFO|trainer.py:3544] 2024-03-27 20:38:42,871 >> ***** Running Evaluation *****
1045
+ [INFO|trainer.py:3546] 2024-03-27 20:38:42,871 >> Num examples = 3123
1046
+ [INFO|trainer.py:3549] 2024-03-27 20:38:42,871 >> Batch size = 32
1047
+ {'loss': 0.125, 'grad_norm': 1.3431593179702759, 'learning_rate': 8.900000000000001e-05, 'epoch': 4.5}
1048
+ [INFO|generation_whisper.py:1111] 2024-03-27 20:38:54,363 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1049
+ [INFO|generation_whisper.py:1111] 2024-03-27 20:39:08,530 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1050
+ 0%| | 0/98 [00:00<?, ?it/s][INFO|generation_whisper.py:1111] 2024-03-27 20:39:20,731 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1051
+ 2%|█▋ | 2/98 [00:12<09:45, 6.10s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:39:33,353 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1052
+ 3%|██▌ | 3/98 [00:24<13:57, 8.82s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:39:45,719 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1053
+ 4%|███▍ | 4/98 [00:37<15:53, 10.14s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:39:58,186 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1054
+ 5%|████▎ | 5/98 [00:49<16:58, 10.95s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:40:10,256 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1055
+ 6%|█████▏ | 6/98 [01:01<17:21, 11.32s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:40:22,756 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1056
+ 7%|██████ | 7/98 [01:14<17:44, 11.70s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:40:35,815 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1057
+ 8%|██████▊ | 8/98 [01:27<18:11, 12.13s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:40:48,410 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1058
+ 9%|███████▋ | 9/98 [01:39<18:12, 12.27s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:41:00,725 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1059
+ 10%|████████▍ | 10/98 [01:52<18:01, 12.29s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:41:13,064 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1060
+ 11%|█████████▎ | 11/98 [02:04<17:50, 12.30s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:41:25,221 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1061
+ 12%|██████████▏ | 12/98 [02:16<17:34, 12.26s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:41:37,320 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1062
+ 13%|███████████ | 13/98 [02:28<17:17, 12.21s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:41:49,529 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1063
+ 14%|███████████▊ | 14/98 [02:40<17:05, 12.21s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:42:02,322 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1064
+ 15%|████████████▋ | 15/98 [02:53<17:07, 12.39s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:42:14,865 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1065
+ 16%|█████████████▌ | 16/98 [03:06<16:59, 12.43s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:42:26,915 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1066
+ 17%|██████████████▍ | 17/98 [03:18<16:37, 12.32s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:42:39,306 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1067
+ 18%|███████████████▏ | 18/98 [03:30<16:27, 12.34s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:42:51,947 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1068
+ 19%|████████████████ | 19/98 [03:43<16:21, 12.43s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:43:04,435 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1069
+ 20%|████████████████▉ | 20/98 [03:55<16:10, 12.45s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:43:17,092 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1070
+ 21%|█████████████████▊ | 21/98 [04:08<16:03, 12.51s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:43:29,623 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1071
+ 22%|██████████████████▋ | 22/98 [04:21<15:51, 12.52s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:43:41,880 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1072
+ 23%|███████████████████▍ | 23/98 [04:33<15:32, 12.44s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:43:54,170 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1073
+ 24%|████████████████████▎ | 24/98 [04:45<15:17, 12.39s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:44:06,407 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1074
+ 26%|█████████████████████▏ | 25/98 [04:57<15:01, 12.35s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:44:18,895 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1075
+ 27%|██████████████████████ | 26/98 [05:10<14:52, 12.39s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:44:31,076 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1076
+ 28%|██████████████████████▊ | 27/98 [05:22<14:35, 12.33s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:44:43,369 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1077
+ 29%|███████████████████████▋ | 28/98 [05:34<14:22, 12.32s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:44:55,737 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1078
+ 30%|████████████████████████▌ | 29/98 [05:47<14:10, 12.33s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:45:08,272 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1079
+ 31%|█████████████████████████▍ | 30/98 [05:59<14:02, 12.39s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:45:20,453 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1080
+ 32%|██████████████████████████▎ | 31/98 [06:11<13:46, 12.33s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:45:32,520 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1081
+ 33%|███████████████████████████ | 32/98 [06:23<13:28, 12.25s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:45:44,789 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1082
+ 34%|███████████████████████████▉ | 33/98 [06:36<13:16, 12.26s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:45:56,701 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1083
+ 35%|████████████████████████████▊ | 34/98 [06:48<12:57, 12.15s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:46:09,446 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1084
+ 36%|█████████████████████████████▋ | 35/98 [07:00<12:56, 12.33s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:46:21,603 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1085
+ 37%|██████████████████████████████▍ | 36/98 [07:13<12:41, 12.28s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:46:34,264 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1086
+ 38%|███████████████████████████████▎ | 37/98 [07:25<12:35, 12.39s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:46:46,595 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1087
+ 39%|████████████████████████████████▏ | 38/98 [07:38<12:22, 12.37s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:46:59,053 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1088
+ 40%|█████████████████████████████████ | 39/98 [07:50<12:11, 12.40s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:47:10,902 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1089
+ 41%|█████████████████████████████████▉ | 40/98 [08:02<11:49, 12.24s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:47:20,081 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1090
+ 42%|██████████████████████████████████▋ | 41/98 [08:11<10:45, 11.32s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:47:25,687 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1091
+ 43%|███████████████████████████████████▌ | 42/98 [08:17<08:57, 9.60s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:47:31,276 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1092
+ 44%|████████████████████████████████████▍ | 43/98 [08:22<07:42, 8.40s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:47:36,866 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1093
+ 45%|█████████████████████████████████████▎ | 44/98 [08:28<06:48, 7.56s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:47:49,750 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1094
+ 46%|██████████████████████████████████████ | 45/98 [08:41<08:05, 9.15s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:47:59,779 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1095
+ 47%|██████████████████████████████████████▉ | 46/98 [08:51<08:09, 9.42s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:48:11,034 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1096
+ 48%|███████████████████████████████████████▊ | 47/98 [09:02<08:28, 9.97s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:48:19,962 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1097
+ 49%|████████████████████████████████████████▋ | 48/98 [09:11<08:02, 9.66s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:48:27,683 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1098
+ 50%|█████████████████████████████████████████▌ | 49/98 [09:19<07:24, 9.07s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:48:41,338 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1099
+ 51%|██████████████████████████████████████████▎ | 50/98 [09:32<08:21, 10.45s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:48:51,077 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1100
+ 52%|███████████████████████████████████████████▏ | 51/98 [09:42<08:01, 10.24s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:49:02,926 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1101
+ 53%|████████████████████████████████████████████ | 52/98 [09:54<08:13, 10.72s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:49:12,936 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1102
+ 54%|████████████████████████████████████████████▉ | 53/98 [10:04<07:52, 10.51s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:49:26,952 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1103
+ 55%|█████████████████████████████████████████████▋ | 54/98 [10:18<08:28, 11.56s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:49:35,958 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1104
+ 56%|██████████████████████████████████████████████▌ | 55/98 [10:27<07:44, 10.79s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:49:43,258 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1105
+ 57%|███████████████████████████████████████████████▍ | 56/98 [10:34<06:49, 9.75s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:49:57,125 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1106
+ 58%|████████████████████████████████████████████████▎ | 57/98 [10:48<07:30, 10.98s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:50:07,215 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1107
+ 59%|█████████████████████████████████████████████████ | 58/98 [10:58<07:08, 10.72s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:50:20,553 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1108
+ 60%|█████████████████████████████████████████████████▉ | 59/98 [11:12<07:28, 11.50s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:50:29,864 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1109
+ 61%|██████████████████████████████████████████████████▊ | 60/98 [11:21<06:52, 10.85s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:50:42,112 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1110
+ 62%|███████████████████████████████████████████████████▋ | 61/98 [11:33<06:56, 11.26s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:50:55,720 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1111
+ 63%|████████████████████████████████████████████████████▌ | 62/98 [11:47<07:10, 11.97s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:51:04,551 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1112
+ 64%|█████████████████████████████████████████████████████▎ | 63/98 [11:56<06:25, 11.03s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:51:17,578 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1113
+ 65%|██████████████████████████████████████████████████████▏ | 64/98 [12:09<06:35, 11.63s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:51:26,598 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1114
+ 66%|███████████████████████████████████████████████████████ | 65/98 [12:18<05:57, 10.85s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:51:39,417 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1115
+ 67%|███████████████████████████████████████████████████████▉ | 66/98 [12:30<06:05, 11.44s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:51:48,597 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1116
+ 68%|████████████████████████████████████████████████████████▋ | 67/98 [12:40<05:33, 10.76s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:52:01,301 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1117
+ 69%|█████████████████████████████████████████████████████████▌ | 68/98 [12:52<05:40, 11.34s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:52:10,667 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1118
+ 70%|██████████████████████████████████████████████████████████▍ | 69/98 [13:02<05:11, 10.75s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:52:23,355 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1119
+ 71%|███████████████████████████████████████████████████████████▎ | 70/98 [13:14<05:17, 11.33s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:52:32,654 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1120
+ 72%|████████████████████████████████████████████████████████████▏ | 71/98 [13:24<04:49, 10.72s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:52:44,957 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1121
+ 73%|████████████████████████████████████████████████████████████▉ | 72/98 [13:36<04:51, 11.20s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:52:54,552 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1122
+ 74%|█████████████████████████████████████████████████████████████▊ | 73/98 [13:46<04:27, 10.72s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:53:04,284 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1123
+ 76%|██████████████████████████████████████████████████████████████▋ | 74/98 [13:55<04:10, 10.42s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:53:14,743 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1124
+ 77%|███████████████████████████████████████████████████████████████▌ | 75/98 [14:06<03:59, 10.43s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:53:27,643 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1125
+ 78%|████████████████████████████████████████████████████████████████▎ | 76/98 [14:19<04:05, 11.17s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:53:37,693 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1126
+ 79%|█████████████████████████████████████████████████████████████████▏ | 77/98 [14:29<03:47, 10.84s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:53:51,003 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1127
+ 80%|██████████████████████████████████████████████████████████████████ | 78/98 [14:42<03:51, 11.58s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:54:04,626 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1128
+ 81%|██████████████████████████████████████████████████████████████████▉ | 79/98 [14:56<03:51, 12.19s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:54:16,550 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1129
+ 82%|███████████████████████████████████████████████████████████████████▊ | 80/98 [15:08<03:38, 12.11s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:54:22,154 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1130
+ 83%|████████████████████████████████████████████████████████████████████▌ | 81/98 [15:13<02:52, 10.16s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:54:29,173 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1131
+ 84%|█████████████████████████████████████████████████████████████████████▍ | 82/98 [15:20<02:27, 9.22s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:54:41,896 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1132
+ 85%|████████████████████████████████���█████████████████████████████████████▎ | 83/98 [15:33<02:34, 10.27s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:54:51,984 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1133
+ 86%|███████████████████████████████████████████████████████████████████████▏ | 84/98 [15:43<02:23, 10.21s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:55:05,787 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1134
+ 87%|███████████████████████████████████████████████████████████████████████▉ | 85/98 [15:57<02:26, 11.29s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:55:18,825 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1135
+ 88%|████████████████████████████████████████████████████████████████████████▊ | 86/98 [16:10<02:21, 11.81s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:55:28,223 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1136
+ 89%|█████████████████████████████████████████████████████████████████████████▋ | 87/98 [16:19<02:02, 11.09s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:55:40,560 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1137
+ 90%|██████████████████████████████████████████████████████████████████████████▌ | 88/98 [16:32<01:54, 11.46s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:55:50,658 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1138
+ 91%|███████████████████████████████████████████████████████████████████████████▍ | 89/98 [16:42<01:39, 11.05s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:56:04,429 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1139
+ 92%|████████████████████████████████████████████████████████████████████████████▏ | 90/98 [16:55<01:34, 11.87s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:56:15,135 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1140
+ 93%|█████████████████████████████████████████████████████████████████████████████ | 91/98 [17:06<01:20, 11.52s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:56:28,795 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1141
+ 95%|██████████████████████████████████████████████████████████████████████████████▊ | 93/98 [17:30<00:57, 11.57s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:56:38,973 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1142
+ 96%|███████████████████████████████████████████████████████████████████████████████▌ | 94/98 [17:44<00:48, 12.17s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:56:52,561 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1143
+ 97%|████████████████████████████████████████████████████████████████████████████████▍ | 95/98 [17:54<00:34, 11.54s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:57:02,626 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1144
+ 98%|█████████████████████████████████████████████████████████████████████████████████▎ | 96/98 [18:07<00:24, 12.23s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:57:16,451 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1145
+ 99%|██████████████████████████████████████████████████████████████████████████████████▏| 97/98 [18:18<00:11, 11.61s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:57:26,758 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1146
+ 99%|██████████████████████████████████████████████████████████████████████████████████▏| 97/98 [18:18<00:11, 11.61s/it][INFO|generation_whisper.py:1111] 2024-03-27 20:57:26,758 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
1147
+ [WARNING|configuration_utils.py:447] 2024-03-27 20:57:36,255 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.of task=transcribe.
1148
+ [WARNING|configuration_utils.py:447] 2024-03-27 20:57:36,255 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.of task=transcribe.
1149
+ Non-default generation parameters: {'max_length': 448, 'begin_suppress_tokens': [220, 50257]}
1150
+ [INFO|configuration_utils.py:471] 2024-03-27 20:57:36,256 >> Configuration saved in ./checkpoint-1000/config.json
1151
+ [INFO|configuration_utils.py:697] 2024-03-27 20:57:36,257 >> Configuration saved in ./checkpoint-1000/generation_config.json
1152
+ [INFO|modeling_utils.py:2475] 2024-03-27 20:57:44,189 >> Model weights saved in ./checkpoint-1000/model.safetensors
1153
+ [INFO|feature_extraction_utils.py:424] 2024-03-27 20:57:44,190 >> Feature extractor saved in ./checkpoint-1000/preprocessor_config.json
1154
+ [INFO|feature_extraction_utils.py:424] 2024-03-27 20:58:06,811 >> Feature extractor saved in ./preprocessor_config.json
1155
+ /home/sanchit/hf/lib/python3.8/site-packages/torch/utils/checkpoint.py:460: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
1156
+ warnings.warn(
1157
+
1158
+
1159
+
1160
+
1161
+
1162
+
1163
+
1164
+
1165
+
1166
+
1167
+
1168
+
wandb/run-20240327_190418-lwtbcr8s/files/requirements.txt ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ accelerate==0.27.2
3
+ aiohttp==3.9.3
4
+ aiosignal==1.3.1
5
+ anyio==4.2.0
6
+ appdirs==1.4.4
7
+ argon2-cffi-bindings==21.2.0
8
+ argon2-cffi==23.1.0
9
+ arrow==1.3.0
10
+ asttokens==2.4.1
11
+ astunparse==1.6.3
12
+ async-lru==2.0.4
13
+ async-timeout==4.0.3
14
+ attrs==23.2.0
15
+ audioread==3.0.1
16
+ av==11.0.0
17
+ babel==2.14.0
18
+ backcall==0.2.0
19
+ beautifulsoup4==4.12.3
20
+ bitsandbytes==0.42.0
21
+ bleach==6.1.0
22
+ cached-property==1.5.2
23
+ cachetools==5.3.2
24
+ certifi==2024.2.2
25
+ cffi==1.16.0
26
+ charset-normalizer==3.3.2
27
+ chex==0.1.7
28
+ click==8.1.7
29
+ coloredlogs==15.0.1
30
+ comm==0.2.1
31
+ contourpy==1.1.1
32
+ ctranslate2==4.1.0
33
+ cycler==0.12.1
34
+ datasets==2.18.0
35
+ debugpy==1.8.0
36
+ decorator==5.1.1
37
+ defusedxml==0.7.1
38
+ dill==0.3.7
39
+ dm-tree==0.1.8
40
+ docker-pycreds==0.4.0
41
+ docstring-parser==0.15
42
+ einops==0.7.0
43
+ etils==1.3.0
44
+ evaluate==0.4.1
45
+ exceptiongroup==1.2.0
46
+ executing==2.0.1
47
+ fastjsonschema==2.19.1
48
+ filelock==3.13.1
49
+ flash-attn==2.5.3
50
+ flatbuffers==23.5.26
51
+ flax==0.7.2
52
+ fonttools==4.48.1
53
+ fqdn==1.5.1
54
+ frozenlist==1.4.1
55
+ fsspec==2024.2.0
56
+ gast==0.4.0
57
+ gitdb==4.0.11
58
+ gitpython==3.1.41
59
+ google-auth-oauthlib==1.0.0
60
+ google-auth==2.27.0
61
+ google-pasta==0.2.0
62
+ grpcio==1.60.1
63
+ h11==0.14.0
64
+ h5py==3.10.0
65
+ httpcore==1.0.2
66
+ httpx==0.26.0
67
+ huggingface-hub==0.21.4
68
+ humanfriendly==10.0
69
+ idna==3.6
70
+ importlib-metadata==7.0.1
71
+ importlib-resources==6.1.1
72
+ iniconfig==2.0.0
73
+ ipdb==0.13.13
74
+ ipykernel==6.29.2
75
+ ipython==8.12.3
76
+ isoduration==20.11.0
77
+ jax==0.4.13
78
+ jaxlib==0.4.13
79
+ jedi==0.19.1
80
+ jinja2==3.1.2
81
+ jiwer==3.0.3
82
+ joblib==1.3.2
83
+ json5==0.9.14
84
+ jsonpointer==2.4
85
+ jsonschema-specifications==2023.12.1
86
+ jsonschema==4.21.1
87
+ jupyter-client==8.6.0
88
+ jupyter-core==5.7.1
89
+ jupyter-events==0.9.0
90
+ jupyter-lsp==2.2.2
91
+ jupyter-server-terminals==0.5.2
92
+ jupyter-server==2.12.5
93
+ jupyterlab-pygments==0.3.0
94
+ jupyterlab-server==2.25.2
95
+ jupyterlab==4.1.0
96
+ keras==2.13.1
97
+ kiwisolver==1.4.5
98
+ lazy-loader==0.3
99
+ libclang==16.0.6
100
+ librosa==0.10.1
101
+ llvmlite==0.41.1
102
+ markdown-it-py==3.0.0
103
+ markdown==3.5.2
104
+ markupsafe==2.1.3
105
+ matplotlib-inline==0.1.6
106
+ matplotlib==3.7.4
107
+ mdurl==0.1.2
108
+ mistune==3.0.2
109
+ ml-dtypes==0.2.0
110
+ more-itertools==10.2.0
111
+ mpmath==1.2.1
112
+ msclap==1.3.3
113
+ msgpack==1.0.7
114
+ multidict==6.0.5
115
+ multiprocess==0.70.15
116
+ nbclient==0.9.0
117
+ nbconvert==7.16.0
118
+ nbformat==5.9.2
119
+ nest-asyncio==1.6.0
120
+ networkx==3.0rc1
121
+ ninja==1.11.1.1
122
+ notebook-shim==0.2.3
123
+ numba==0.58.1
124
+ numpy==1.24.3
125
+ nvidia-cublas-cu12==12.1.3.1
126
+ nvidia-cuda-cupti-cu12==12.1.105
127
+ nvidia-cuda-nvrtc-cu12==12.1.105
128
+ nvidia-cuda-runtime-cu12==12.1.105
129
+ nvidia-cudnn-cu12==8.9.2.26
130
+ nvidia-cufft-cu12==11.0.2.54
131
+ nvidia-curand-cu12==10.3.2.106
132
+ nvidia-cusolver-cu12==11.4.5.107
133
+ nvidia-cusparse-cu12==12.1.0.106
134
+ nvidia-nccl-cu12==2.19.3
135
+ nvidia-nvjitlink-cu12==12.1.105
136
+ nvidia-nvtx-cu12==12.1.105
137
+ oauthlib==3.2.2
138
+ onnxruntime==1.17.1
139
+ openai-whisper==20231117
140
+ opt-einsum==3.3.0
141
+ optax==0.1.8
142
+ orbax-checkpoint==0.2.3
143
+ overrides==7.7.0
144
+ packaging==23.2
145
+ pandas==2.0.3
146
+ pandocfilters==1.5.1
147
+ parameterized==0.9.0
148
+ parso==0.8.3
149
+ peft==0.8.2
150
+ pexpect==4.9.0
151
+ pickleshare==0.7.5
152
+ pillow==9.3.0
153
+ pip==24.0
154
+ pkg-resources==0.0.0
155
+ pkgutil-resolve-name==1.3.10
156
+ platformdirs==4.2.0
157
+ pluggy==1.4.0
158
+ pooch==1.8.0
159
+ prometheus-client==0.19.0
160
+ prompt-toolkit==3.0.43
161
+ protobuf==4.25.2
162
+ psutil==5.9.8
163
+ ptyprocess==0.7.0
164
+ pure-eval==0.2.2
165
+ pyarrow-hotfix==0.6
166
+ pyarrow==15.0.0
167
+ pyasn1-modules==0.3.0
168
+ pyasn1==0.5.1
169
+ pycparser==2.21
170
+ pygments==2.17.2
171
+ pyparsing==3.1.1
172
+ pytest==7.4.4
173
+ python-dateutil==2.8.2
174
+ python-json-logger==2.0.7
175
+ pytorch-triton==3.0.0+901819d2b6
176
+ pytz==2024.1
177
+ pyyaml==6.0.1
178
+ pyzmq==25.1.2
179
+ rapidfuzz==3.6.1
180
+ referencing==0.33.0
181
+ regex==2023.12.25
182
+ requests-oauthlib==1.3.1
183
+ requests==2.31.0
184
+ responses==0.18.0
185
+ rfc3339-validator==0.1.4
186
+ rfc3986-validator==0.1.1
187
+ rich==13.7.0
188
+ rpds-py==0.17.1
189
+ rsa==4.9
190
+ safetensors==0.4.2
191
+ scikit-learn==1.3.2
192
+ scipy==1.10.1
193
+ send2trash==1.8.2
194
+ sentry-sdk==1.40.0
195
+ setproctitle==1.3.3
196
+ setuptools==44.0.0
197
+ shtab==1.7.0
198
+ six==1.16.0
199
+ smmap==5.0.1
200
+ sniffio==1.3.0
201
+ soundfile==0.12.1
202
+ soupsieve==2.5
203
+ soxr==0.3.7
204
+ stack-data==0.6.3
205
+ sympy==1.11.1
206
+ tensorboard-data-server==0.7.2
207
+ tensorboard==2.13.0
208
+ tensorflow-cpu==2.13.1
209
+ tensorflow-estimator==2.13.0
210
+ tensorflow-io-gcs-filesystem==0.34.0
211
+ tensorstore==0.1.45
212
+ termcolor==2.4.0
213
+ terminado==0.18.0
214
+ threadpoolctl==3.2.0
215
+ tiktoken==0.6.0
216
+ tinycss2==1.2.1
217
+ tokenizers==0.15.1
218
+ tomli==2.0.1
219
+ toolz==0.12.1
220
+ torch==2.2.1
221
+ torchaudio==2.2.1
222
+ torchlibrosa==0.1.0
223
+ torchvision==0.17.1
224
+ tornado==6.4
225
+ tqdm==4.66.1
226
+ traitlets==5.14.1
227
+ transformers==4.39.0.dev0
228
+ triton==2.2.0
229
+ trl==0.7.11
230
+ types-python-dateutil==2.8.19.20240106
231
+ typing-extensions==4.9.0
232
+ tyro==0.7.3
233
+ tzdata==2023.4
234
+ uri-template==1.3.0
235
+ urllib3==2.2.0
236
+ wandb==0.16.2
237
+ wcwidth==0.2.13
238
+ webcolors==1.13
239
+ webencodings==0.5.1
240
+ websocket-client==1.7.0
241
+ werkzeug==3.0.1
242
+ wheel==0.42.0
243
+ wrapt==1.16.0
244
+ xxhash==3.4.1
245
+ yarl==1.9.4
246
+ zipp==3.17.0
wandb/run-20240327_190418-lwtbcr8s/files/wandb-metadata.json ADDED
@@ -0,0 +1,738 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-166-generic-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2024-03-27T18:04:19.090782",
5
+ "startedAt": "2024-03-27T18:04:18.560848",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [
9
+ "--model_name_or_path=distil-whisper/distil-large-v3",
10
+ "--dataset_name=mozilla-foundation/common_voice_16_1",
11
+ "--dataset_config_name=hi",
12
+ "--language=hindi",
13
+ "--train_split_name=train+validation",
14
+ "--eval_split_name=test",
15
+ "--max_steps=5000",
16
+ "--output_dir=./",
17
+ "--per_device_train_batch_size=32",
18
+ "--per_device_eval_batch_size=32",
19
+ "--logging_steps=25",
20
+ "--learning_rate=1e-4",
21
+ "--warmup_steps=500",
22
+ "--evaluation_strategy=steps",
23
+ "--eval_steps=1000",
24
+ "--save_strategy=steps",
25
+ "--save_steps=1000",
26
+ "--save_total_limit=1",
27
+ "--generation_max_length=225",
28
+ "--preprocessing_num_workers=1",
29
+ "--dataloader_num_workers=4",
30
+ "--length_column_name=input_length",
31
+ "--max_duration_in_seconds=30",
32
+ "--text_column_name=sentence",
33
+ "--freeze_feature_encoder=False",
34
+ "--gradient_checkpointing",
35
+ "--fp16",
36
+ "--overwrite_output_dir",
37
+ "--do_train",
38
+ "--do_eval",
39
+ "--predict_with_generate",
40
+ "--use_auth_token",
41
+ "--push_to_hub"
42
+ ],
43
+ "state": "running",
44
+ "program": "run_speech_recognition_seq2seq.py",
45
+ "codePathLocal": "run_speech_recognition_seq2seq.py",
46
+ "codePath": "run_speech_recognition_seq2seq.py",
47
+ "git": {
48
+ "remote": "https://huggingface.co/sanchit-gandhi/distil-large-v3-hi-ft",
49
+ "commit": "40c686df113c0e98e7363c1bd523f58d11848fc0"
50
+ },
51
+ "email": "sanchit@huggingface.co",
52
+ "root": "/home/sanchit/distil-large-v3-hi-ft",
53
+ "host": "hf-dgx-01",
54
+ "username": "sanchit",
55
+ "executable": "/home/sanchit/hf/bin/python",
56
+ "cpu_count": 64,
57
+ "cpu_count_logical": 128,
58
+ "cpu_freq": {
59
+ "current": 2290.4996484375006,
60
+ "min": 1500.0,
61
+ "max": 2250.0
62
+ },
63
+ "cpu_freq_per_core": [
64
+ {
65
+ "current": 2186.729,
66
+ "min": 1500.0,
67
+ "max": 2250.0
68
+ },
69
+ {
70
+ "current": 1795.542,
71
+ "min": 1500.0,
72
+ "max": 2250.0
73
+ },
74
+ {
75
+ "current": 1792.045,
76
+ "min": 1500.0,
77
+ "max": 2250.0
78
+ },
79
+ {
80
+ "current": 1791.339,
81
+ "min": 1500.0,
82
+ "max": 2250.0
83
+ },
84
+ {
85
+ "current": 1791.496,
86
+ "min": 1500.0,
87
+ "max": 2250.0
88
+ },
89
+ {
90
+ "current": 1742.02,
91
+ "min": 1500.0,
92
+ "max": 2250.0
93
+ },
94
+ {
95
+ "current": 1692.095,
96
+ "min": 1500.0,
97
+ "max": 2250.0
98
+ },
99
+ {
100
+ "current": 1658.911,
101
+ "min": 1500.0,
102
+ "max": 2250.0
103
+ },
104
+ {
105
+ "current": 1606.212,
106
+ "min": 1500.0,
107
+ "max": 2250.0
108
+ },
109
+ {
110
+ "current": 1606.808,
111
+ "min": 1500.0,
112
+ "max": 2250.0
113
+ },
114
+ {
115
+ "current": 1606.448,
116
+ "min": 1500.0,
117
+ "max": 2250.0
118
+ },
119
+ {
120
+ "current": 2094.446,
121
+ "min": 1500.0,
122
+ "max": 2250.0
123
+ },
124
+ {
125
+ "current": 1734.944,
126
+ "min": 1500.0,
127
+ "max": 2250.0
128
+ },
129
+ {
130
+ "current": 1736.215,
131
+ "min": 1500.0,
132
+ "max": 2250.0
133
+ },
134
+ {
135
+ "current": 2489.415,
136
+ "min": 1500.0,
137
+ "max": 2250.0
138
+ },
139
+ {
140
+ "current": 1737.217,
141
+ "min": 1500.0,
142
+ "max": 2250.0
143
+ },
144
+ {
145
+ "current": 1695.232,
146
+ "min": 1500.0,
147
+ "max": 2250.0
148
+ },
149
+ {
150
+ "current": 2416.542,
151
+ "min": 1500.0,
152
+ "max": 2250.0
153
+ },
154
+ {
155
+ "current": 1737.614,
156
+ "min": 1500.0,
157
+ "max": 2250.0
158
+ },
159
+ {
160
+ "current": 1599.452,
161
+ "min": 1500.0,
162
+ "max": 2250.0
163
+ },
164
+ {
165
+ "current": 1772.758,
166
+ "min": 1500.0,
167
+ "max": 2250.0
168
+ },
169
+ {
170
+ "current": 1738.087,
171
+ "min": 1500.0,
172
+ "max": 2250.0
173
+ },
174
+ {
175
+ "current": 2311.378,
176
+ "min": 1500.0,
177
+ "max": 2250.0
178
+ },
179
+ {
180
+ "current": 1735.599,
181
+ "min": 1500.0,
182
+ "max": 2250.0
183
+ },
184
+ {
185
+ "current": 1620.967,
186
+ "min": 1500.0,
187
+ "max": 2250.0
188
+ },
189
+ {
190
+ "current": 1864.863,
191
+ "min": 1500.0,
192
+ "max": 2250.0
193
+ },
194
+ {
195
+ "current": 1729.177,
196
+ "min": 1500.0,
197
+ "max": 2250.0
198
+ },
199
+ {
200
+ "current": 1728.804,
201
+ "min": 1500.0,
202
+ "max": 2250.0
203
+ },
204
+ {
205
+ "current": 1788.133,
206
+ "min": 1500.0,
207
+ "max": 2250.0
208
+ },
209
+ {
210
+ "current": 1795.255,
211
+ "min": 1500.0,
212
+ "max": 2250.0
213
+ },
214
+ {
215
+ "current": 1794.306,
216
+ "min": 1500.0,
217
+ "max": 2250.0
218
+ },
219
+ {
220
+ "current": 1794.529,
221
+ "min": 1500.0,
222
+ "max": 2250.0
223
+ },
224
+ {
225
+ "current": 1733.742,
226
+ "min": 1500.0,
227
+ "max": 2250.0
228
+ },
229
+ {
230
+ "current": 2473.813,
231
+ "min": 1500.0,
232
+ "max": 2250.0
233
+ },
234
+ {
235
+ "current": 1932.309,
236
+ "min": 1500.0,
237
+ "max": 2250.0
238
+ },
239
+ {
240
+ "current": 1727.805,
241
+ "min": 1500.0,
242
+ "max": 2250.0
243
+ },
244
+ {
245
+ "current": 1730.935,
246
+ "min": 1500.0,
247
+ "max": 2250.0
248
+ },
249
+ {
250
+ "current": 1729.65,
251
+ "min": 1500.0,
252
+ "max": 2250.0
253
+ },
254
+ {
255
+ "current": 1730.989,
256
+ "min": 1500.0,
257
+ "max": 2250.0
258
+ },
259
+ {
260
+ "current": 2002.554,
261
+ "min": 1500.0,
262
+ "max": 2250.0
263
+ },
264
+ {
265
+ "current": 2089.927,
266
+ "min": 1500.0,
267
+ "max": 2250.0
268
+ },
269
+ {
270
+ "current": 1727.843,
271
+ "min": 1500.0,
272
+ "max": 2250.0
273
+ },
274
+ {
275
+ "current": 1728.434,
276
+ "min": 1500.0,
277
+ "max": 2250.0
278
+ },
279
+ {
280
+ "current": 2271.01,
281
+ "min": 1500.0,
282
+ "max": 2250.0
283
+ },
284
+ {
285
+ "current": 3370.134,
286
+ "min": 1500.0,
287
+ "max": 2250.0
288
+ },
289
+ {
290
+ "current": 1687.104,
291
+ "min": 1500.0,
292
+ "max": 2250.0
293
+ },
294
+ {
295
+ "current": 1688.345,
296
+ "min": 1500.0,
297
+ "max": 2250.0
298
+ },
299
+ {
300
+ "current": 1694.029,
301
+ "min": 1500.0,
302
+ "max": 2250.0
303
+ },
304
+ {
305
+ "current": 3295.66,
306
+ "min": 1500.0,
307
+ "max": 2250.0
308
+ },
309
+ {
310
+ "current": 1684.621,
311
+ "min": 1500.0,
312
+ "max": 2250.0
313
+ },
314
+ {
315
+ "current": 1686.668,
316
+ "min": 1500.0,
317
+ "max": 2250.0
318
+ },
319
+ {
320
+ "current": 1687.585,
321
+ "min": 1500.0,
322
+ "max": 2250.0
323
+ },
324
+ {
325
+ "current": 1727.907,
326
+ "min": 1500.0,
327
+ "max": 2250.0
328
+ },
329
+ {
330
+ "current": 1728.023,
331
+ "min": 1500.0,
332
+ "max": 2250.0
333
+ },
334
+ {
335
+ "current": 1727.835,
336
+ "min": 1500.0,
337
+ "max": 2250.0
338
+ },
339
+ {
340
+ "current": 2090.043,
341
+ "min": 1500.0,
342
+ "max": 2250.0
343
+ },
344
+ {
345
+ "current": 2245.903,
346
+ "min": 1500.0,
347
+ "max": 2250.0
348
+ },
349
+ {
350
+ "current": 2202.881,
351
+ "min": 1500.0,
352
+ "max": 2250.0
353
+ },
354
+ {
355
+ "current": 2284.838,
356
+ "min": 1500.0,
357
+ "max": 2250.0
358
+ },
359
+ {
360
+ "current": 1598.829,
361
+ "min": 1500.0,
362
+ "max": 2250.0
363
+ },
364
+ {
365
+ "current": 1656.082,
366
+ "min": 1500.0,
367
+ "max": 2250.0
368
+ },
369
+ {
370
+ "current": 1661.638,
371
+ "min": 1500.0,
372
+ "max": 2250.0
373
+ },
374
+ {
375
+ "current": 1649.714,
376
+ "min": 1500.0,
377
+ "max": 2250.0
378
+ },
379
+ {
380
+ "current": 1655.626,
381
+ "min": 1500.0,
382
+ "max": 2250.0
383
+ },
384
+ {
385
+ "current": 3027.152,
386
+ "min": 1500.0,
387
+ "max": 2250.0
388
+ },
389
+ {
390
+ "current": 2017.291,
391
+ "min": 1500.0,
392
+ "max": 2250.0
393
+ },
394
+ {
395
+ "current": 2012.466,
396
+ "min": 1500.0,
397
+ "max": 2250.0
398
+ },
399
+ {
400
+ "current": 1990.236,
401
+ "min": 1500.0,
402
+ "max": 2250.0
403
+ },
404
+ {
405
+ "current": 2114.394,
406
+ "min": 1500.0,
407
+ "max": 2250.0
408
+ },
409
+ {
410
+ "current": 2095.433,
411
+ "min": 1500.0,
412
+ "max": 2250.0
413
+ },
414
+ {
415
+ "current": 2096.911,
416
+ "min": 1500.0,
417
+ "max": 2250.0
418
+ },
419
+ {
420
+ "current": 1840.403,
421
+ "min": 1500.0,
422
+ "max": 2250.0
423
+ },
424
+ {
425
+ "current": 2203.59,
426
+ "min": 1500.0,
427
+ "max": 2250.0
428
+ },
429
+ {
430
+ "current": 2185.592,
431
+ "min": 1500.0,
432
+ "max": 2250.0
433
+ },
434
+ {
435
+ "current": 2201.94,
436
+ "min": 1500.0,
437
+ "max": 2250.0
438
+ },
439
+ {
440
+ "current": 2018.775,
441
+ "min": 1500.0,
442
+ "max": 2250.0
443
+ },
444
+ {
445
+ "current": 2200.874,
446
+ "min": 1500.0,
447
+ "max": 2250.0
448
+ },
449
+ {
450
+ "current": 1873.351,
451
+ "min": 1500.0,
452
+ "max": 2250.0
453
+ },
454
+ {
455
+ "current": 3369.243,
456
+ "min": 1500.0,
457
+ "max": 2250.0
458
+ },
459
+ {
460
+ "current": 2235.133,
461
+ "min": 1500.0,
462
+ "max": 2250.0
463
+ },
464
+ {
465
+ "current": 3367.352,
466
+ "min": 1500.0,
467
+ "max": 2250.0
468
+ },
469
+ {
470
+ "current": 3364.254,
471
+ "min": 1500.0,
472
+ "max": 2250.0
473
+ },
474
+ {
475
+ "current": 2202.453,
476
+ "min": 1500.0,
477
+ "max": 2250.0
478
+ },
479
+ {
480
+ "current": 2038.768,
481
+ "min": 1500.0,
482
+ "max": 2250.0
483
+ },
484
+ {
485
+ "current": 2233.914,
486
+ "min": 1500.0,
487
+ "max": 2250.0
488
+ },
489
+ {
490
+ "current": 2182.285,
491
+ "min": 1500.0,
492
+ "max": 2250.0
493
+ },
494
+ {
495
+ "current": 2302.527,
496
+ "min": 1500.0,
497
+ "max": 2250.0
498
+ },
499
+ {
500
+ "current": 2185.247,
501
+ "min": 1500.0,
502
+ "max": 2250.0
503
+ },
504
+ {
505
+ "current": 2132.843,
506
+ "min": 1500.0,
507
+ "max": 2250.0
508
+ },
509
+ {
510
+ "current": 2971.398,
511
+ "min": 1500.0,
512
+ "max": 2250.0
513
+ },
514
+ {
515
+ "current": 2055.752,
516
+ "min": 1500.0,
517
+ "max": 2250.0
518
+ },
519
+ {
520
+ "current": 1567.353,
521
+ "min": 1500.0,
522
+ "max": 2250.0
523
+ },
524
+ {
525
+ "current": 1609.825,
526
+ "min": 1500.0,
527
+ "max": 2250.0
528
+ },
529
+ {
530
+ "current": 1606.242,
531
+ "min": 1500.0,
532
+ "max": 2250.0
533
+ },
534
+ {
535
+ "current": 1610.589,
536
+ "min": 1500.0,
537
+ "max": 2250.0
538
+ },
539
+ {
540
+ "current": 1617.499,
541
+ "min": 1500.0,
542
+ "max": 2250.0
543
+ },
544
+ {
545
+ "current": 2398.668,
546
+ "min": 1500.0,
547
+ "max": 2250.0
548
+ },
549
+ {
550
+ "current": 3020.272,
551
+ "min": 1500.0,
552
+ "max": 2250.0
553
+ },
554
+ {
555
+ "current": 2053.074,
556
+ "min": 1500.0,
557
+ "max": 2250.0
558
+ },
559
+ {
560
+ "current": 2141.083,
561
+ "min": 1500.0,
562
+ "max": 2250.0
563
+ },
564
+ {
565
+ "current": 2182.762,
566
+ "min": 1500.0,
567
+ "max": 2250.0
568
+ },
569
+ {
570
+ "current": 2190.047,
571
+ "min": 1500.0,
572
+ "max": 2250.0
573
+ },
574
+ {
575
+ "current": 2209.589,
576
+ "min": 1500.0,
577
+ "max": 2250.0
578
+ },
579
+ {
580
+ "current": 2072.985,
581
+ "min": 1500.0,
582
+ "max": 2250.0
583
+ },
584
+ {
585
+ "current": 2427.726,
586
+ "min": 1500.0,
587
+ "max": 2250.0
588
+ },
589
+ {
590
+ "current": 2205.137,
591
+ "min": 1500.0,
592
+ "max": 2250.0
593
+ },
594
+ {
595
+ "current": 2197.942,
596
+ "min": 1500.0,
597
+ "max": 2250.0
598
+ },
599
+ {
600
+ "current": 2208.468,
601
+ "min": 1500.0,
602
+ "max": 2250.0
603
+ },
604
+ {
605
+ "current": 3307.897,
606
+ "min": 1500.0,
607
+ "max": 2250.0
608
+ },
609
+ {
610
+ "current": 2210.539,
611
+ "min": 1500.0,
612
+ "max": 2250.0
613
+ },
614
+ {
615
+ "current": 2580.208,
616
+ "min": 1500.0,
617
+ "max": 2250.0
618
+ },
619
+ {
620
+ "current": 2487.416,
621
+ "min": 1500.0,
622
+ "max": 2250.0
623
+ },
624
+ {
625
+ "current": 3360.967,
626
+ "min": 1500.0,
627
+ "max": 2250.0
628
+ },
629
+ {
630
+ "current": 2409.573,
631
+ "min": 1500.0,
632
+ "max": 2250.0
633
+ },
634
+ {
635
+ "current": 2399.184,
636
+ "min": 1500.0,
637
+ "max": 2250.0
638
+ },
639
+ {
640
+ "current": 2400.551,
641
+ "min": 1500.0,
642
+ "max": 2250.0
643
+ },
644
+ {
645
+ "current": 2453.642,
646
+ "min": 1500.0,
647
+ "max": 2250.0
648
+ },
649
+ {
650
+ "current": 1814.426,
651
+ "min": 1500.0,
652
+ "max": 2250.0
653
+ },
654
+ {
655
+ "current": 2440.557,
656
+ "min": 1500.0,
657
+ "max": 2250.0
658
+ },
659
+ {
660
+ "current": 2422.632,
661
+ "min": 1500.0,
662
+ "max": 2250.0
663
+ },
664
+ {
665
+ "current": 1919.862,
666
+ "min": 1500.0,
667
+ "max": 2250.0
668
+ },
669
+ {
670
+ "current": 2519.195,
671
+ "min": 1500.0,
672
+ "max": 2250.0
673
+ },
674
+ {
675
+ "current": 1666.938,
676
+ "min": 1500.0,
677
+ "max": 2250.0
678
+ },
679
+ {
680
+ "current": 1669.157,
681
+ "min": 1500.0,
682
+ "max": 2250.0
683
+ },
684
+ {
685
+ "current": 1794.328,
686
+ "min": 1500.0,
687
+ "max": 2250.0
688
+ },
689
+ {
690
+ "current": 1797.233,
691
+ "min": 1500.0,
692
+ "max": 2250.0
693
+ },
694
+ {
695
+ "current": 1793.218,
696
+ "min": 1500.0,
697
+ "max": 2250.0
698
+ },
699
+ {
700
+ "current": 1790.531,
701
+ "min": 1500.0,
702
+ "max": 2250.0
703
+ }
704
+ ],
705
+ "disk": {
706
+ "/": {
707
+ "total": 1757.8785285949707,
708
+ "used": 1614.0720481872559
709
+ }
710
+ },
711
+ "gpu": "NVIDIA A100-SXM4-80GB",
712
+ "gpu_count": 5,
713
+ "gpu_devices": [
714
+ {
715
+ "name": "NVIDIA A100-SXM4-80GB",
716
+ "memory_total": 85899345920
717
+ },
718
+ {
719
+ "name": "NVIDIA A100-SXM4-80GB",
720
+ "memory_total": 85899345920
721
+ },
722
+ {
723
+ "name": "NVIDIA A100-SXM4-80GB",
724
+ "memory_total": 85899345920
725
+ },
726
+ {
727
+ "name": "NVIDIA DGX Display",
728
+ "memory_total": 4294967296
729
+ },
730
+ {
731
+ "name": "NVIDIA A100-SXM4-80GB",
732
+ "memory_total": 85899345920
733
+ }
734
+ ],
735
+ "memory": {
736
+ "total": 503.5396919250488
737
+ }
738
+ }
wandb/run-20240327_190418-lwtbcr8s/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 0.125, "train/grad_norm": 1.3431593179702759, "train/learning_rate": 8.900000000000001e-05, "train/epoch": 4.5, "train/global_step": 1000, "_timestamp": 1711569456.2541177, "_runtime": 6797.689527750015, "_step": 40, "eval/loss": 0.4658036530017853, "eval/wer": 0.43003940353859227, "eval/runtime": 1133.3821, "eval/samples_per_second": 2.755, "eval/steps_per_second": 0.086}
wandb/run-20240327_190418-lwtbcr8s/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240327_190418-lwtbcr8s/logs/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Current SDK version is 0.16.2
2
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Configure stats pid to 1893386
3
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/.config/wandb/settings
4
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/distil-large-v3-hi-ft/wandb/settings
5
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program_abspath': '/home/sanchit/distil-large-v3-hi-ft/run_speech_recognition_seq2seq.py', 'program': 'run_speech_recognition_seq2seq.py'}
8
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:_log_setup():526] Logging user logs to /home/sanchit/distil-large-v3-hi-ft/wandb/run-20240327_190418-lwtbcr8s/logs/debug.log
9
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:_log_setup():527] Logging internal logs to /home/sanchit/distil-large-v3-hi-ft/wandb/run-20240327_190418-lwtbcr8s/logs/debug-internal.log
10
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:init():566] calling init triggers
11
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
12
+ config: {}
13
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:init():616] starting backend
14
+ 2024-03-27 19:04:18,562 INFO MainThread:1893386 [wandb_init.py:init():620] setting up manager
15
+ 2024-03-27 19:04:18,563 INFO MainThread:1893386 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
16
+ 2024-03-27 19:04:18,564 INFO MainThread:1893386 [wandb_init.py:init():628] backend started and connected
17
+ 2024-03-27 19:04:18,568 INFO MainThread:1893386 [wandb_init.py:init():720] updated telemetry
18
+ 2024-03-27 19:04:18,639 INFO MainThread:1893386 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
19
+ 2024-03-27 19:04:18,995 INFO MainThread:1893386 [wandb_run.py:_on_init():2254] communicating current version
20
+ 2024-03-27 19:04:19,034 INFO MainThread:1893386 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.5 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
21
+
22
+ 2024-03-27 19:04:19,034 INFO MainThread:1893386 [wandb_init.py:init():804] starting run threads in backend
23
+ 2024-03-27 19:04:19,118 INFO MainThread:1893386 [wandb_run.py:_console_start():2233] atexit reg
24
+ 2024-03-27 19:04:19,118 INFO MainThread:1893386 [wandb_run.py:_redirect():2088] redirect: wrap_raw
25
+ 2024-03-27 19:04:19,119 INFO MainThread:1893386 [wandb_run.py:_redirect():2153] Wrapping output streams.
26
+ 2024-03-27 19:04:19,119 INFO MainThread:1893386 [wandb_run.py:_redirect():2178] Redirects installed.
27
+ 2024-03-27 19:04:19,119 INFO MainThread:1893386 [wandb_init.py:init():847] run started, returning control to user process
28
+ 2024-03-27 19:04:19,121 INFO MainThread:1893386 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 2, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'distil-whisper/distil-large-v3', 'transformers_version': '4.40.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 5000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Mar27_19-04-06_hf-dgx-01', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 4, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
wandb/run-20240327_190418-lwtbcr8s/run-lwtbcr8s.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0ba05f1e017ca342d3b2973674cb98788723f936678e1306d0f19a09b282999
3
+ size 1430398