othmanfa commited on Jun 3

Commit

39b6e99

•

1 Parent(s): 6d10665

Training in progress, epoch 1

Browse files

Files changed (19) hide show

README.md +55 -0
adapter_config.json +28 -0
adapter_model.safetensors +3 -0
runs/Jun03_17-40-11_f28ebe0d2526/events.out.tfevents.1717436754.f28ebe0d2526.34.0 +3 -0
runs/Jun03_17-40-11_f28ebe0d2526/events.out.tfevents.1717437875.f28ebe0d2526.34.1 +3 -0
runs/Jun03_18-45-28_f28ebe0d2526/events.out.tfevents.1717440337.f28ebe0d2526.34.2 +3 -0
runs/Jun03_20-50-07_f28ebe0d2526/events.out.tfevents.1717447816.f28ebe0d2526.34.3 +3 -0
training_args.bin +3 -0
wandb/debug-internal.log +0 -0
wandb/debug.log +172 -0
wandb/run-20240603_175449-d191dh7n/files/conda-environment.yaml +0 -0
wandb/run-20240603_175449-d191dh7n/files/config.yaml +710 -0
wandb/run-20240603_175449-d191dh7n/files/output.log +223 -0
wandb/run-20240603_175449-d191dh7n/files/requirements.txt +870 -0
wandb/run-20240603_175449-d191dh7n/files/wandb-metadata.json +66 -0
wandb/run-20240603_175449-d191dh7n/files/wandb-summary.json +1 -0
wandb/run-20240603_175449-d191dh7n/logs/debug-internal.log +0 -0
wandb/run-20240603_175449-d191dh7n/logs/debug.log +172 -0
wandb/run-20240603_175449-d191dh7n/run-d191dh7n.wandb +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+---
+license: apache-2.0
+library_name: peft
+tags:
+- generated_from_trainer
+base_model: tiiuae/falcon-7b
+model-index:
+- name: fsttModel
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# fsttModel
+This model is a fine-tuned version of [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b) on an unknown dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 20
+- mixed_precision_training: Native AMP
+### Training results
+### Framework versions
+- PEFT 0.11.1
+- Transformers 4.41.1
+- Pytorch 2.1.2
+- Datasets 2.19.1
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f726b0e60d373dc414e88603a56d7102d9585bb9bf270aab3bdfbc620d0619f
+size 18883912

runs/Jun03_17-40-11_f28ebe0d2526/events.out.tfevents.1717436754.f28ebe0d2526.34.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f8e80ad4a151a5a3a320d4c416b015d3ce136e2d55f2b53290fda909dac8f1e
+size 5880

runs/Jun03_17-40-11_f28ebe0d2526/events.out.tfevents.1717437875.f28ebe0d2526.34.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2cebdd6b17cca36ad5ad0a524ca4de1a6e7cd55bed6b3302b0b4e9f88e64053
+size 12848

runs/Jun03_18-45-28_f28ebe0d2526/events.out.tfevents.1717440337.f28ebe0d2526.34.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a38e602952c3fd87a80023fa875b8f1bdbd3892742969ac75114bf744eb4862a
+size 16622

runs/Jun03_20-50-07_f28ebe0d2526/events.out.tfevents.1717447816.f28ebe0d2526.34.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbd365d0298ce470a44767777c8e3e9e4884d57e8e7c53a68dc5437e64da3d58
+size 11881

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9615d3520f03a04c2571da0c900c8a98c6874a896688842d49c8bf3b1731df87
+size 5176

wandb/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,172 @@

+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Current SDK version is 0.17.0
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Configure stats pid to 34
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Loading settings from /kaggle/working/wandb/settings
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Applying login settings: {}
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_init.py:_log_setup():520] Logging user logs to /kaggle/working/wandb/run-20240603_175449-d191dh7n/logs/debug.log
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:_log_setup():521] Logging internal logs to /kaggle/working/wandb/run-20240603_175449-d191dh7n/logs/debug-internal.log
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:_jupyter_setup():466] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x78eae9ee9ab0>
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:init():560] calling init triggers
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:init():567] wandb.init called with sweep_config: {}
+config: {}
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:init():610] starting backend
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:init():614] setting up manager
+2024-06-03 17:54:49,036 INFO    MainThread:34 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-06-03 17:54:49,038 INFO    MainThread:34 [wandb_init.py:init():622] backend started and connected
+2024-06-03 17:54:49,052 INFO    MainThread:34 [wandb_run.py:_label_probe_notebook():1328] probe notebook
+2024-06-03 17:54:49,382 INFO    MainThread:34 [wandb_init.py:init():711] updated telemetry
+2024-06-03 17:54:49,386 INFO    MainThread:34 [wandb_init.py:init():744] communicating run to backend with 90.0 second timeout
+2024-06-03 17:54:49,688 INFO    MainThread:34 [wandb_run.py:_on_init():2396] communicating current version
+2024-06-03 17:54:49,771 INFO    MainThread:34 [wandb_run.py:_on_init():2405] got version response
+2024-06-03 17:54:49,772 INFO    MainThread:34 [wandb_init.py:init():795] starting run threads in backend
+2024-06-03 17:55:06,077 INFO    MainThread:34 [wandb_run.py:_console_start():2374] atexit reg
+2024-06-03 17:55:06,077 INFO    MainThread:34 [wandb_run.py:_redirect():2229] redirect: wrap_raw
+2024-06-03 17:55:06,078 INFO    MainThread:34 [wandb_run.py:_redirect():2294] Wrapping output streams.
+2024-06-03 17:55:06,078 INFO    MainThread:34 [wandb_run.py:_redirect():2319] Redirects installed.
+2024-06-03 17:55:06,081 INFO    MainThread:34 [wandb_init.py:init():838] run started, returning control to user process
+2024-06-03 17:55:06,087 INFO    MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_17-40-11_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
+2024-06-03 17:55:07,353 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 17:55:07,353 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 17:56:56,275 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 17:56:56,290 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 17:56:56,290 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 17:56:59,514 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 17:56:59,595 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 17:56:59,595 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 17:57:06,214 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 17:57:06,261 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 17:57:06,261 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:01:57,364 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:01:57,366 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:01:57,366 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:02:16,908 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:02:16,951 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:02:16,952 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:02:46,250 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:02:46,252 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:02:46,252 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:03:47,943 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:03:48,029 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:03:48,029 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:04:13,706 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:04:13,759 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:04:13,759 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:04:26,491 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:04:26,697 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:04:26,697 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:04:34,326 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:04:35,570 INFO    MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_17-40-11_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
+2024-06-03 18:18:50,784 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:18:50,784 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:41:05,951 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:41:05,953 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:41:05,953 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:45:28,892 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:45:28,927 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:45:28,927 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:45:30,228 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:45:30,229 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:45:30,229 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:45:31,254 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:45:31,276 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:45:31,276 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:45:33,122 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:45:33,358 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:45:33,358 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:45:36,415 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:45:37,683 INFO    MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 20, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_18-45-28_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
+2024-06-03 19:55:43,601 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 19:55:43,602 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 19:56:53,516 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 19:56:55,309 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 19:56:55,309 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:02:21,391 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:02:22,164 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:02:22,164 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:03:12,802 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:03:12,827 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:03:12,827 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:03:22,908 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:03:23,545 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:03:23,546 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:04:16,404 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:04:16,447 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:04:16,447 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:04:32,978 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:04:33,028 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:04:33,028 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:05:18,072 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:05:18,118 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:05:18,118 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:05:31,531 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:05:31,580 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:05:31,580 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:05:44,101 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:05:44,780 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:05:44,780 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:06:37,084 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:06:37,830 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:06:37,830 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:08:59,975 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:09:00,010 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:09:00,010 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:09:06,499 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:09:06,500 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:09:06,500 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:09:07,197 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:09:07,218 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:09:07,218 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:09:18,369 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:09:19,119 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:09:19,120 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:11:52,561 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:11:54,589 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:11:54,589 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:16:50,594 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:19:04,529 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:19:04,530 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:20:33,194 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:20:33,197 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:20:33,197 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:20:53,790 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:24:20,236 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:24:20,236 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:32:45,840 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:32:45,841 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:32:45,841 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:34:19,718 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:34:19,722 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:34:19,722 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:34:21,601 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:34:21,602 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:34:21,602 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:34:23,187 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:37:48,397 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:37:48,397 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:38:33,502 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:41:58,862 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:41:58,862 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:43:51,168 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:43:51,171 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:43:51,171 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:43:53,895 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:47:25,895 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:47:25,895 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:50:07,262 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:50:07,303 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:50:07,303 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:50:09,915 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:50:09,916 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:50:09,917 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:50:10,463 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:50:10,484 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:50:10,484 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:50:13,975 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:50:14,119 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:50:14,119 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:50:15,412 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:50:16,872 INFO    MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 20, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_20-50-07_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}

wandb/run-20240603_175449-d191dh7n/files/conda-environment.yaml ADDED Viewed

File without changes

wandb/run-20240603_175449-d191dh7n/files/config.yaml ADDED Viewed

	@@ -0,0 +1,710 @@

+wandb_version: 1
+_wandb:
+  desc: null
+  value:
+    python_version: 3.10.13
+    cli_version: 0.17.0
+    framework: huggingface
+    huggingface_version: 4.41.1
+    is_jupyter_run: true
+    is_kaggle_kernel: true
+    start_time: 1717437289
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 5
+      - 11
+      - 12
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 98
+      - 105
+      2:
+      - 1
+      - 2
+      - 3
+      - 5
+      - 11
+      - 12
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 98
+      - 105
+      3:
+      - 7
+      - 13
+      - 23
+      - 62
+      - 66
+      4: 3.10.13
+      5: 0.17.0
+      6: 4.41.1
+      8:
+      - 1
+      - 2
+      - 5
+      9:
+        1: transformers_trainer
+      13: linux-x86_64
+    m:
+    - 1: train/global_step
+      6:
+      - 3
+    - 1: train/loss
+      5: 1
+      6:
+      - 1
+    - 1: train/grad_norm
+      5: 1
+      6:
+      - 1
+    - 1: train/learning_rate
+      5: 1
+      6:
+      - 1
+    - 1: train/epoch
+      5: 1
+      6:
+      - 1
+vocab_size:
+  desc: null
+  value: 65024
+hidden_size:
+  desc: null
+  value: 4544
+num_hidden_layers:
+  desc: null
+  value: 32
+num_attention_heads:
+  desc: null
+  value: 71
+layer_norm_epsilon:
+  desc: null
+  value: 1.0e-05
+initializer_range:
+  desc: null
+  value: 0.02
+use_cache:
+  desc: null
+  value: false
+hidden_dropout:
+  desc: null
+  value: 0.0
+attention_dropout:
+  desc: null
+  value: 0.0
+bos_token_id:
+  desc: null
+  value: 11
+eos_token_id:
+  desc: null
+  value: 11
+num_kv_heads:
+  desc: null
+  value: 71
+alibi:
+  desc: null
+  value: false
+new_decoder_architecture:
+  desc: null
+  value: false
+multi_query:
+  desc: null
+  value: true
+parallel_attn:
+  desc: null
+  value: true
+bias:
+  desc: null
+  value: false
+return_dict:
+  desc: null
+  value: true
+output_hidden_states:
+  desc: null
+  value: false
+output_attentions:
+  desc: null
+  value: false
+torchscript:
+  desc: null
+  value: false
+torch_dtype:
+  desc: null
+  value: bfloat16
+use_bfloat16:
+  desc: null
+  value: false
+tf_legacy_loss:
+  desc: null
+  value: false
+pruned_heads:
+  desc: null
+  value: {}
+tie_word_embeddings:
+  desc: null
+  value: true
+chunk_size_feed_forward:
+  desc: null
+  value: 0
+is_encoder_decoder:
+  desc: null
+  value: false
+is_decoder:
+  desc: null
+  value: false
+cross_attention_hidden_size:
+  desc: null
+  value: null
+add_cross_attention:
+  desc: null
+  value: false
+tie_encoder_decoder:
+  desc: null
+  value: false
+max_length:
+  desc: null
+  value: 20
+min_length:
+  desc: null
+  value: 0
+do_sample:
+  desc: null
+  value: false
+early_stopping:
+  desc: null
+  value: false
+num_beams:
+  desc: null
+  value: 1
+num_beam_groups:
+  desc: null
+  value: 1
+diversity_penalty:
+  desc: null
+  value: 0.0
+temperature:
+  desc: null
+  value: 1.0
+top_k:
+  desc: null
+  value: 50
+top_p:
+  desc: null
+  value: 1.0
+typical_p:
+  desc: null
+  value: 1.0
+repetition_penalty:
+  desc: null
+  value: 1.0
+length_penalty:
+  desc: null
+  value: 1.0
+no_repeat_ngram_size:
+  desc: null
+  value: 0
+encoder_no_repeat_ngram_size:
+  desc: null
+  value: 0
+bad_words_ids:
+  desc: null
+  value: null
+num_return_sequences:
+  desc: null
+  value: 1
+output_scores:
+  desc: null
+  value: false
+return_dict_in_generate:
+  desc: null
+  value: false
+forced_bos_token_id:
+  desc: null
+  value: null
+forced_eos_token_id:
+  desc: null
+  value: null
+remove_invalid_values:
+  desc: null
+  value: false
+exponential_decay_length_penalty:
+  desc: null
+  value: null
+suppress_tokens:
+  desc: null
+  value: null
+begin_suppress_tokens:
+  desc: null
+  value: null
+architectures:
+  desc: null
+  value:
+  - FalconForCausalLM
+finetuning_task:
+  desc: null
+  value: null
+id2label:
+  desc: null
+  value:
+    '0': LABEL_0
+    '1': LABEL_1
+label2id:
+  desc: null
+  value:
+    LABEL_0: 0
+    LABEL_1: 1
+tokenizer_class:
+  desc: null
+  value: null
+prefix:
+  desc: null
+  value: null
+pad_token_id:
+  desc: null
+  value: null
+sep_token_id:
+  desc: null
+  value: null
+decoder_start_token_id:
+  desc: null
+  value: null
+task_specific_params:
+  desc: null
+  value: null
+problem_type:
+  desc: null
+  value: null
+_name_or_path:
+  desc: null
+  value: tiiuae/falcon-7b
+transformers_version:
+  desc: null
+  value: 4.41.1
+apply_residual_connection_post_layernorm:
+  desc: null
+  value: false
+auto_map:
+  desc: null
+  value:
+    AutoConfig: tiiuae/falcon-7b--configuration_falcon.FalconConfig
+    AutoModel: tiiuae/falcon-7b--modeling_falcon.FalconModel
+    AutoModelForSequenceClassification: tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification
+    AutoModelForTokenClassification: tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification
+    AutoModelForQuestionAnswering: tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering
+    AutoModelForCausalLM: tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM
+model_type:
+  desc: null
+  value: falcon
+quantization_config:
+  desc: null
+  value:
+    quant_method: QuantizationMethod.BITS_AND_BYTES
+    _load_in_8bit: false
+    _load_in_4bit: true
+    llm_int8_threshold: 6.0
+    llm_int8_skip_modules: null
+    llm_int8_enable_fp32_cpu_offload: false
+    llm_int8_has_fp16_weight: false
+    bnb_4bit_quant_type: nf4
+    bnb_4bit_use_double_quant: false
+    bnb_4bit_compute_dtype: bfloat16
+    bnb_4bit_quant_storage: uint8
+    load_in_4bit: true
+    load_in_8bit: false
+output_dir:
+  desc: null
+  value: /kaggle/working/
+overwrite_output_dir:
+  desc: null
+  value: false
+do_train:
+  desc: null
+  value: false
+do_eval:
+  desc: null
+  value: false
+do_predict:
+  desc: null
+  value: false
+eval_strategy:
+  desc: null
+  value: 'no'
+prediction_loss_only:
+  desc: null
+  value: false
+per_device_train_batch_size:
+  desc: null
+  value: 8
+per_device_eval_batch_size:
+  desc: null
+  value: 8
+per_gpu_train_batch_size:
+  desc: null
+  value: null
+per_gpu_eval_batch_size:
+  desc: null
+  value: null
+gradient_accumulation_steps:
+  desc: null
+  value: 1
+eval_accumulation_steps:
+  desc: null
+  value: null
+eval_delay:
+  desc: null
+  value: 0
+learning_rate:
+  desc: null
+  value: 0.0002
+weight_decay:
+  desc: null
+  value: 0.0
+adam_beta1:
+  desc: null
+  value: 0.9
+adam_beta2:
+  desc: null
+  value: 0.999
+adam_epsilon:
+  desc: null
+  value: 1.0e-08
+max_grad_norm:
+  desc: null
+  value: 1.0
+num_train_epochs:
+  desc: null
+  value: 20
+max_steps:
+  desc: null
+  value: -1
+lr_scheduler_type:
+  desc: null
+  value: linear
+lr_scheduler_kwargs:
+  desc: null
+  value: {}
+warmup_ratio:
+  desc: null
+  value: 0.0
+warmup_steps:
+  desc: null
+  value: 0
+log_level:
+  desc: null
+  value: passive
+log_level_replica:
+  desc: null
+  value: warning
+log_on_each_node:
+  desc: null
+  value: true
+logging_dir:
+  desc: null
+  value: /kaggle/working/runs/Jun03_20-50-07_f28ebe0d2526
+logging_strategy:
+  desc: null
+  value: steps
+logging_first_step:
+  desc: null
+  value: false
+logging_steps:
+  desc: null
+  value: 10
+logging_nan_inf_filter:
+  desc: null
+  value: true
+save_strategy:
+  desc: null
+  value: epoch
+save_steps:
+  desc: null
+  value: 500
+save_total_limit:
+  desc: null
+  value: 4
+save_safetensors:
+  desc: null
+  value: true
+save_on_each_node:
+  desc: null
+  value: false
+save_only_model:
+  desc: null
+  value: false
+restore_callback_states_from_checkpoint:
+  desc: null
+  value: false
+no_cuda:
+  desc: null
+  value: false
+use_cpu:
+  desc: null
+  value: false
+use_mps_device:
+  desc: null
+  value: false
+seed:
+  desc: null
+  value: 42
+data_seed:
+  desc: null
+  value: null
+jit_mode_eval:
+  desc: null
+  value: false
+use_ipex:
+  desc: null
+  value: false
+bf16:
+  desc: null
+  value: false
+fp16:
+  desc: null
+  value: true
+fp16_opt_level:
+  desc: null
+  value: O1
+half_precision_backend:
+  desc: null
+  value: auto
+bf16_full_eval:
+  desc: null
+  value: false
+fp16_full_eval:
+  desc: null
+  value: false
+tf32:
+  desc: null
+  value: null
+local_rank:
+  desc: null
+  value: 0
+ddp_backend:
+  desc: null
+  value: null
+tpu_num_cores:
+  desc: null
+  value: null
+tpu_metrics_debug:
+  desc: null
+  value: false
+debug:
+  desc: null
+  value: []
+dataloader_drop_last:
+  desc: null
+  value: false
+eval_steps:
+  desc: null
+  value: null
+dataloader_num_workers:
+  desc: null
+  value: 0
+dataloader_prefetch_factor:
+  desc: null
+  value: null
+past_index:
+  desc: null
+  value: -1
+run_name:
+  desc: null
+  value: /kaggle/working/
+disable_tqdm:
+  desc: null
+  value: false
+remove_unused_columns:
+  desc: null
+  value: true
+label_names:
+  desc: null
+  value: null
+load_best_model_at_end:
+  desc: null
+  value: false
+metric_for_best_model:
+  desc: null
+  value: null
+greater_is_better:
+  desc: null
+  value: null
+ignore_data_skip:
+  desc: null
+  value: false
+fsdp:
+  desc: null
+  value: []
+fsdp_min_num_params:
+  desc: null
+  value: 0
+fsdp_config:
+  desc: null
+  value:
+    min_num_params: 0
+    xla: false
+    xla_fsdp_v2: false
+    xla_fsdp_grad_ckpt: false
+fsdp_transformer_layer_cls_to_wrap:
+  desc: null
+  value: null
+accelerator_config:
+  desc: null
+  value:
+    split_batches: false
+    dispatch_batches: null
+    even_batches: true
+    use_seedable_sampler: true
+    non_blocking: false
+    gradient_accumulation_kwargs: null
+deepspeed:
+  desc: null
+  value: null
+label_smoothing_factor:
+  desc: null
+  value: 0.0
+optim:
+  desc: null
+  value: adamw_torch
+optim_args:
+  desc: null
+  value: null
+adafactor:
+  desc: null
+  value: false
+group_by_length:
+  desc: null
+  value: false
+length_column_name:
+  desc: null
+  value: length
+report_to:
+  desc: null
+  value:
+  - tensorboard
+  - wandb
+ddp_find_unused_parameters:
+  desc: null
+  value: null
+ddp_bucket_cap_mb:
+  desc: null
+  value: null
+ddp_broadcast_buffers:
+  desc: null
+  value: null
+dataloader_pin_memory:
+  desc: null
+  value: true
+dataloader_persistent_workers:
+  desc: null
+  value: false
+skip_memory_metrics:
+  desc: null
+  value: true
+use_legacy_prediction_loop:
+  desc: null
+  value: false
+push_to_hub:
+  desc: null
+  value: true
+resume_from_checkpoint:
+  desc: null
+  value: null
+hub_model_id:
+  desc: null
+  value: othmanfa/fsttModel
+hub_strategy:
+  desc: null
+  value: every_save
+hub_token:
+  desc: null
+  value: <HUB_TOKEN>
+hub_private_repo:
+  desc: null
+  value: false
+hub_always_push:
+  desc: null
+  value: false
+gradient_checkpointing:
+  desc: null
+  value: false
+gradient_checkpointing_kwargs:
+  desc: null
+  value: null
+include_inputs_for_metrics:
+  desc: null
+  value: false
+eval_do_concat_batches:
+  desc: null
+  value: true
+fp16_backend:
+  desc: null
+  value: auto
+evaluation_strategy:
+  desc: null
+  value: null
+push_to_hub_model_id:
+  desc: null
+  value: null
+push_to_hub_organization:
+  desc: null
+  value: null
+push_to_hub_token:
+  desc: null
+  value: <PUSH_TO_HUB_TOKEN>
+mp_parameters:
+  desc: null
+  value: ''
+auto_find_batch_size:
+  desc: null
+  value: true
+full_determinism:
+  desc: null
+  value: false
+torchdynamo:
+  desc: null
+  value: null
+ray_scope:
+  desc: null
+  value: last
+ddp_timeout:
+  desc: null
+  value: 1800
+torch_compile:
+  desc: null
+  value: false
+torch_compile_backend:
+  desc: null
+  value: null
+torch_compile_mode:
+  desc: null
+  value: null
+dispatch_batches:
+  desc: null
+  value: null
+split_batches:
+  desc: null
+  value: null
+include_tokens_per_second:
+  desc: null
+  value: false
+include_num_input_tokens_seen:
+  desc: null
+  value: false
+neftune_noise_alpha:
+  desc: null
+  value: null
+optim_target_modules:
+  desc: null
+  value: null
+batch_eval_metrics:
+  desc: null
+  value: false

wandb/run-20240603_175449-d191dh7n/files/output.log ADDED Viewed

	@@ -0,0 +1,223 @@

+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+2
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1659: UserWarning: You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on cpu, whereas the model is on cuda. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('cuda') before running `.generate()`.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:515: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.
+  warnings.warn(
+The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
+Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
+quel est le type de formation de filiere Analyse Appliquée et Ingénierie Statistique (AAS)?
+Bonjour,
+Je suis actuellement en 2ème année de licence mathématiques et je souhaite intégrer une formation d'Analyse Appliquée et Ingénierie Statistique.
+J'ai entendu dire que cette formation était très difficile et qu'il fallait avoir un très bon niveau en mathématiques.
+Est
+The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
+Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
+Type Formation: master
+Nom Filiere: Analyse Appliquée et Ingénierie Statistique
+Objectifs:
+Le Master Analyse Appliquée et Ingénierie Statistique (MAAIS) de l'Université Abdelmalek Essaâdi a pour objectif de former des ingénieurs capables d’apporter une réponse scientifique et technologique à des questions issues de secteurs différents (Industrie, Finance, Santé, Informatique, Marketing, etc.). Les ingénieurs diplômés de la filière MAAIS seront reconnus pour leur capacité d’innovation, leur esprit d’entreprise et leur ouverture sur le monde.
+Avec la multiplication des données issues de l’internet des objets, de l’analyse du comportement des consommateurs, des données médicales, des systèmes de trading haute fréquence etc., et la nécessité d’intégrer des technologies analyt
+The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
+Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
+Type Formation: master
+Nom Filiere: Analyse Appliquée et Ingénierie Statistique
+Programme:
+Le cursus mathématiques appliquée et statistique vise à former des ingénieurs capables de faire la synthèse entre les mathématiques et les applications avec comme objectifs :
+- De former des ingénieurs polyvalents, ayant des compétences en Mathématiques, et en même temps aptes à faire face à des problèmes de type scientifique et industrielle
+- D’enseigner des mathématiques appliquées et statistique à l’enseignement secondaire
+Programme:
+Mathématiques et Calcul Scientifique  :         : MASP1 (développement d’applications sur ordinateur et méthodes de résolution des équations et optimisation)     : MASP2 (probabilités, théorie des
+Time to retrieve answer: 205.20784872600052
+The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
+Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
+Type Formation: master
+Nom Filiere: Analyse Appliquée et Ingénierie Statistique
+Programme:
+Nom Module:
+Options: Communication de la recherche
+			  Analyse statistique
+			  Modélisation
+			  Data Mining
+			  Analyse de données
+			  Optimisation
+			  Data Science
+			  Entreprendre son projet
+ Date: 01 sept 2022 au 31 août 2024 	semestre  2 : Analyse statistique
+			  et données numériques
+			  et outils
+			  d’optimisation
+			  et d’estimation
+Time to retrieve answer: 205.3583044219995
+The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
+Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
+le nom de coordinateur de master Analyse Appliquée et Ingénierie Statistique est M.
+El-Gharbawi. Il a obtenu son doctorat en mathématiques appliquées et sa spécialisation est l’Analyse des Données en 2007 à l’université de Montréal au Canada. Il a aussi obtenu un diplôme d’expert en données massives en 2019.
+Il a également obtenu un diplôme de master en Analyse des données massives en 2022.
+Il a également obtenu un master en statistiques et analyse des données en 2022.
+Il a également obtenu un master en statistique des données massives en 2022.
+Enfin, il a obtenu un master en analyse des données massives en 2022.
+Il est également responsable de l’équipe de recherche en Analyse des données massives.
+Il est également responsable
+Time to retrieve answer: 211.99833258299986
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
+  return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
+/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.

wandb/run-20240603_175449-d191dh7n/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,870 @@

+==
+Babel==2.14.0
+Boruta==0.3
+Brotli==1.1.0
+CVXcanon==0.1.2
+Cartopy==0.23.0
+Cython==3.0.8
+Deprecated==1.2.14
+Farama-Notifications==0.0.4
+Flask==3.0.3
+Geohash==1.0
+GitPython==3.1.41
+ImageHash==4.3.1
+Janome==0.5.0
+Jinja2==3.1.2
+LunarCalendar==0.0.9
+Mako==1.3.5
+Markdown==3.5.2
+MarkupSafe==2.1.3
+MarkupSafe==2.1.5
+Pillow==9.5.0
+PuLP==2.8.0
+PyArabic==0.6.15
+PyJWT==2.8.0
+PyMeeus==0.5.12
+PySocks==1.7.1
+PyUpSet==0.1.1.post7
+PyWavelets==1.5.0
+PyYAML==6.0.1
+Pygments==2.17.2
+Pympler==1.0.1
+QtPy==2.4.1
+Rtree==1.2.0
+SQLAlchemy==2.0.25
+SecretStorage==3.3.3
+Send2Trash==1.8.2
+Shapely==1.8.5.post1
+Shimmy==1.3.0
+SimpleITK==2.3.1
+TPOT==0.12.1
+Theano-PyMC==1.1.2
+Theano==1.0.5
+Wand==0.6.13
+Werkzeug==3.0.3
+absl-py==1.4.0
+accelerate==0.30.1
+access==1.1.9
+affine==2.4.0
+aiobotocore==2.13.0
+aiofiles==22.1.0
+aiohttp-cors==0.7.0
+aiohttp==3.9.5
+aioitertools==0.11.0
+aiorwlock==1.3.0
+aiosignal==1.3.1
+aiosqlite==0.19.0
+albumentations==1.4.0
+alembic==1.13.1
+altair==5.3.0
+annotated-types==0.6.0
+annotated-types==0.7.0
+annoy==1.17.3
+anyio==4.2.0
+apache-beam==2.46.0
+aplus==0.11.0
+appdirs==1.4.4
+archspec==0.2.3
+argon2-cffi-bindings==21.2.0
+argon2-cffi==23.1.0
+array-record==0.5.0
+arrow==1.3.0
+arviz==0.18.0
+astroid==3.2.2
+astropy-iers-data==0.2024.5.27.0.30.8
+astropy==6.1.0
+asttokens==2.4.1
+astunparse==1.6.3
+async-lru==2.0.4
+async-timeout==4.0.3
+attrs==23.2.0
+audioread==3.0.1
+autopep8==2.0.4
+backoff==2.2.1
+bayesian-optimization==1.4.3
+beatrix_jupyterlab==2023.128.151533
+beautifulsoup4==4.12.2
+bitsandbytes==0.43.1
+blake3==0.2.1
+bleach==6.1.0
+blessed==1.20.0
+blinker==1.8.2
+blis==0.7.10
+blosc2==2.6.2
+bokeh==3.4.1
+boltons==23.1.1
+boto3==1.26.100
+botocore==1.34.106
+bq_helper==0.4.1
+bqplot==0.12.43
+branca==0.7.2
+brewer2mpl==1.4.1
+brotlipy==0.7.0
+cached-property==1.5.2
+cachetools==4.2.4
+cachetools==5.3.2
+catalogue==2.0.10
+catalyst==22.4
+catboost==1.2.5
+category-encoders==2.6.3
+certifi==2024.2.2
+cesium==0.12.1
+cffi==1.16.0
+charset-normalizer==3.3.2
+chex==0.1.86
+cleverhans==4.0.0
+click-plugins==1.1.1
+click==8.1.7
+cligj==0.7.2
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+cloudpathlib==0.16.0
+cloudpickle==2.2.1
+cloudpickle==3.0.0
+cmdstanpy==1.2.2
+colorama==0.4.6
+colorcet==3.1.0
+colorful==0.5.6
+colorlog==6.8.2
+colorlover==0.3.0
+comm==0.2.1
+conda-libmamba-solver==23.12.0
+conda-package-handling==2.2.0
+conda==24.5.0
+conda_package_streaming==0.9.0
+confection==0.1.4
+contextily==1.6.0
+contourpy==1.2.0
+contourpy==1.2.1
+convertdate==2.4.0
+crcmod==1.7
+cryptography==41.0.7
+cuda-python==12.5.0
+cudf==24.4.1
+cufflinks==0.17.3
+cuml==24.4.0
+cupy==13.1.0
+cycler==0.12.1
+cymem==2.0.8
+cytoolz==0.12.3
+daal4py==2024.4.0
+daal==2024.4.0
+dacite==1.8.1
+dask-cuda==24.4.0
+dask-cudf==24.4.1
+dask-expr==1.1.1
+dask==2024.5.1
+dataclasses-json==0.6.6
+dataproc_jupyter_plugin==0.1.66
+datasets==2.19.1
+datashader==0.16.1
+datatile==1.0.3
+db-dtypes==1.2.0
+deap==1.4.1
+debugpy==1.8.0
+decorator==5.1.1
+deepdiff==7.0.1
+defusedxml==0.7.1
+deprecation==2.1.0
+descartes==1.1.0
+dill==0.3.8
+dipy==1.9.0
+distlib==0.3.8
+distributed==2024.1.1
+distro==1.9.0
+dm-tree==0.1.8
+docker-pycreds==0.4.0
+docker==7.0.0
+docopt==0.6.2
+docstring-parser==0.15
+docstring-to-markdown==0.15
+docutils==0.21.2
+earthengine-api==0.1.404
+easydict==1.13
+easyocr==1.7.1
+ecos==2.0.13
+eli5==0.13.0
+emoji==2.12.1
+en-core-web-lg==3.7.1
+en-core-web-sm==3.7.1
+entrypoints==0.4
+ephem==4.1.5
+esda==2.5.1
+essentia==2.1b6.dev1110
+et-xmlfile==1.1.0
+etils==1.6.0
+exceptiongroup==1.2.0
+executing==2.0.1
+explainable-ai-sdk==1.3.3
+fastai==2.7.15
+fastapi==0.108.0
+fastavro==1.9.3
+fastcore==1.5.41
+fastdownload==0.0.7
+fasteners==0.19
+fastjsonschema==2.19.1
+fastprogress==1.0.3
+fastrlock==0.8.2
+fasttext==0.9.2
+feather-format==0.4.1
+featuretools==1.31.0
+filelock==3.13.1
+fiona==1.9.6
+fitter==1.7.0
+flake8==7.0.0
+flashtext==2.7
+flatbuffers==23.5.26
+flax==0.8.4
+folium==0.16.0
+fonttools==4.47.0
+fonttools==4.52.4
+fqdn==1.5.1
+frozendict==2.4.4
+frozenlist==1.4.1
+fsspec==2024.3.1
+fsspec==2024.5.0
+funcy==2.0
+fury==0.10.0
+future==1.0.0
+fuzzywuzzy==0.18.0
+gast==0.5.4
+gatspy==0.3
+gcsfs==2024.3.1
+gensim==4.3.2
+geographiclib==2.0
+geojson==3.1.0
+geopandas==0.14.4
+geoplot==0.5.1
+geopy==2.4.1
+geoviews==1.12.0
+ggplot==0.11.5
+giddy==2.3.5
+gitdb==4.0.11
+google-ai-generativelanguage==0.6.4
+google-api-core==2.11.1
+google-api-core==2.19.0
+google-api-python-client==2.131.0
+google-apitools==0.5.31
+google-auth-httplib2==0.2.0
+google-auth-oauthlib==1.2.0
+google-auth==2.26.1
+google-cloud-aiplatform==0.6.0a1
+google-cloud-artifact-registry==1.10.0
+google-cloud-automl==1.0.1
+google-cloud-bigquery==2.34.4
+google-cloud-bigtable==1.7.3
+google-cloud-core==2.4.1
+google-cloud-datastore==2.19.0
+google-cloud-dlp==3.14.0
+google-cloud-jupyter-config==0.0.5
+google-cloud-language==2.13.3
+google-cloud-monitoring==2.18.0
+google-cloud-pubsub==2.19.0
+google-cloud-pubsublite==1.9.0
+google-cloud-recommendations-ai==0.7.1
+google-cloud-resource-manager==1.11.0
+google-cloud-spanner==3.40.1
+google-cloud-storage==1.44.0
+google-cloud-translate==3.12.1
+google-cloud-videointelligence==2.13.3
+google-cloud-vision==2.8.0
+google-crc32c==1.5.0
+google-generativeai==0.5.4
+google-pasta==0.2.0
+google-resumable-media==2.7.0
+googleapis-common-protos==1.62.0
+gplearn==0.4.2
+gpustat==1.0.0
+gpxpy==1.6.2
+graphviz==0.20.3
+greenlet==3.0.3
+grpc-google-iam-v1==0.12.7
+grpcio-status==1.48.1
+grpcio-status==1.48.2
+grpcio==1.59.3
+grpcio==1.60.0
+gviz-api==1.10.0
+gym-notices==0.0.8
+gym==0.26.2
+gymnasium==0.29.0
+h11==0.14.0
+h2o==3.46.0.2
+h5netcdf==1.3.0
+h5py==3.10.0
+haversine==2.8.1
+hdfs==2.7.3
+hep-ml==0.7.2
+hijri-converter==2.3.1
+hmmlearn==0.3.2
+holidays==0.24
+holoviews==1.18.3
+hpsklearn==0.1.0
+html5lib==1.1
+htmlmin==0.1.12
+httpcore==1.0.5
+httplib2==0.21.0
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.2
+hunspell==0.5.5
+hydra-slayer==0.5.0
+hyperopt==0.2.7
+hypertools==0.8.0
+idna==3.6
+igraph==0.11.5
+imagecodecs==2024.1.1
+imageio==2.33.1
+imbalanced-learn==0.12.3
+imgaug==0.4.0
+importlib-metadata==6.11.0
+importlib-metadata==7.0.1
+importlib-resources==6.1.1
+inequality==1.0.1
+iniconfig==2.0.0
+ipydatawidgets==4.3.5
+ipykernel==6.28.0
+ipyleaflet==0.19.1
+ipympl==0.7.0
+ipython-genutils==0.2.0
+ipython-genutils==0.2.0
+ipython-sql==0.5.0
+ipython==8.20.0
+ipyvolume==0.6.3
+ipyvue==1.11.1
+ipyvuetify==1.9.4
+ipywebrtc==0.6.0
+ipywidgets==7.7.1
+isoduration==20.11.0
+isort==5.13.2
+isoweek==1.3.3
+itsdangerous==2.2.0
+jaraco.classes==3.3.0
+jax-jumpy==1.0.0
+jax==0.4.26
+jaxlib==0.4.26.dev20240504
+jedi==0.19.1
+jeepney==0.8.0
+jieba==0.42.1
+jmespath==1.0.1
+joblib==1.4.2
+json5==0.9.14
+jsonpatch==1.33
+jsonpointer==2.4
+jsonschema-specifications==2023.12.1
+jsonschema==4.20.0
+jupyter-console==6.6.3
+jupyter-events==0.9.0
+jupyter-http-over-ws==0.0.8
+jupyter-leaflet==0.19.1
+jupyter-lsp==1.5.1
+jupyter-server-mathjax==0.2.6
+jupyter-ydoc==0.2.5
+jupyter_client==7.4.9
+jupyter_client==8.6.0
+jupyter_core==5.7.1
+jupyter_server==2.12.5
+jupyter_server_fileid==0.9.1
+jupyter_server_proxy==4.1.0
+jupyter_server_terminals==0.5.1
+jupyter_server_ydoc==0.8.0
+jupyterlab-lsp==5.1.0
+jupyterlab-widgets==3.0.9
+jupyterlab==4.2.1
+jupyterlab_git==0.44.0
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.2
+jupytext==1.16.0
+kaggle-environments==1.14.9
+kaggle==1.6.14
+kagglehub==0.2.5
+keras-cv==0.9.0
+keras-nlp==0.12.1
+keras-tuner==1.4.6
+keras==3.3.3
+kernels-mixer==0.0.7
+keyring==24.3.0
+keyrings.google-artifactregistry-auth==1.1.2
+kfp-pipeline-spec==0.2.2
+kfp-server-api==2.0.5
+kfp==2.5.0
+kiwisolver==1.4.5
+kmapper==2.0.1
+kmodes==0.12.2
+korean-lunar-calendar==0.3.1
+kornia==0.7.2
+kornia_rs==0.1.3
+kt-legacy==1.0.5
+kubernetes==26.1.0
+langcodes==3.4.0
+langid==1.1.6
+language_data==1.2.0
+lazy_loader==0.3
+learntools==0.3.4
+leven==1.0.4
+libclang==16.0.6
+libmambapy==1.5.8
+libpysal==4.9.2
+librosa==0.10.2.post1
+lightgbm==4.2.0
+lightning-utilities==0.11.2
+lime==0.2.0.1
+line_profiler==4.1.3
+linkify-it-py==2.0.3
+llvmlite==0.41.1
+llvmlite==0.42.0
+lml==0.1.0
+locket==1.0.0
+loguru==0.7.2
+lxml==5.2.2
+lz4==4.3.3
+mamba==1.5.8
+mapclassify==2.6.1
+marisa-trie==1.1.0
+markdown-it-py==3.0.0
+marshmallow==3.21.2
+matplotlib-inline==0.1.6
+matplotlib-venn==0.11.10
+matplotlib==3.7.5
+matplotlib==3.8.4
+mccabe==0.7.0
+mdit-py-plugins==0.4.0
+mdurl==0.1.2
+memory-profiler==0.61.0
+menuinst==2.0.1
+mercantile==1.2.1
+mgwr==2.2.1
+missingno==0.5.2
+mistune==0.8.4
+mizani==0.11.4
+ml-dtypes==0.2.0
+mlcrate==0.2.0
+mlens==0.2.3
+mlxtend==0.23.1
+mne==1.7.0
+mnist==0.2.2
+momepy==0.7.0
+more-itertools==10.2.0
+mpld3==0.5.10
+mpmath==1.3.0
+msgpack==1.0.7
+msgpack==1.0.8
+multidict==6.0.4
+multimethod==1.10
+multipledispatch==1.0.0
+multiprocess==0.70.16
+munkres==1.1.4
+murmurhash==1.0.10
+mypy-extensions==1.0.0
+namex==0.0.8
+nb-conda-kernels==2.3.1
+nb_conda==2.2.1
+nbclassic==1.0.0
+nbclient==0.5.13
+nbconvert==6.4.5
+nbdime==3.2.0
+nbformat==5.9.2
+ndindex==1.8
+nest-asyncio==1.5.8
+networkx==3.2.1
+nibabel==5.2.1
+nilearn==0.10.4
+ninja==1.11.1.1
+nltk==3.2.4
+nose==1.3.7
+notebook==6.5.4
+notebook==6.5.6
+notebook_executor==0.2
+notebook_shim==0.2.3
+numba==0.58.1
+numba==0.59.1
+numexpr==2.10.0
+numpy==1.26.4
+nvidia-ml-py==11.495.46
+nvtx==0.2.10
+oauth2client==4.1.3
+oauthlib==3.2.2
+objsize==0.6.1
+odfpy==1.4.1
+olefile==0.47
+onnx==1.16.1
+opencensus-context==0.1.3
+opencensus==0.11.4
+opencv-contrib-python==4.9.0.80
+opencv-python-headless==4.9.0.80
+opencv-python==4.9.0.80
+openpyxl==3.1.2
+openslide-python==1.3.1
+opentelemetry-api==1.22.0
+opentelemetry-exporter-otlp-proto-common==1.22.0
+opentelemetry-exporter-otlp-proto-grpc==1.22.0
+opentelemetry-exporter-otlp-proto-http==1.22.0
+opentelemetry-exporter-otlp==1.22.0
+opentelemetry-proto==1.22.0
+opentelemetry-sdk==1.22.0
+opentelemetry-semantic-conventions==0.43b0
+opt-einsum==3.3.0
+optax==0.2.2
+optree==0.11.0
+optuna==3.6.1
+orbax-checkpoint==0.5.14
+ordered-set==4.1.0
+orjson==3.9.10
+ortools==9.4.1874
+osmnx==1.9.3
+overrides==7.4.0
+packaging==21.3
+pandas-datareader==0.10.0
+pandas-profiling==3.6.6
+pandas-summary==0.2.0
+pandas==2.2.1
+pandas==2.2.2
+pandasql==0.7.3
+pandocfilters==1.5.0
+panel==1.4.3
+papermill==2.5.0
+param==2.1.0
+parso==0.8.3
+partd==1.4.2
+path.py==12.5.0
+path==16.14.0
+pathos==0.3.2
+pathy==0.10.3
+patsy==0.5.6
+pdf2image==1.17.0
+peft==0.11.1
+pettingzoo==1.24.0
+pexpect==4.8.0
+pexpect==4.9.0
+phik==0.12.4
+pickleshare==0.7.5
+pillow==10.3.0
+pip==23.3.2
+pkgutil_resolve_name==1.3.10
+platformdirs==4.2.2
+plotly-express==0.4.1
+plotly==5.18.0
+plotnine==0.13.6
+pluggy==1.5.0
+pointpats==2.4.0
+polars==0.20.30
+polyglot==16.7.4
+pooch==1.8.1
+pox==0.3.4
+ppca==0.0.4
+ppft==1.7.6.8
+preprocessing==0.1.13
+preshed==3.0.9
+prettytable==3.9.0
+progressbar2==4.4.2
+prometheus-client==0.19.0
+promise==2.3
+prompt-toolkit==3.0.42
+prompt-toolkit==3.0.43
+prophet==1.1.1
+proto-plus==1.23.0
+protobuf==3.20.3
+protobuf==4.24.4
+psutil==5.9.3
+psutil==5.9.7
+ptyprocess==0.7.0
+pudb==2024.1
+pure-eval==0.2.2
+py-cpuinfo==9.0.0
+py-spy==0.3.14
+py4j==0.10.9.7
+pyLDAvis==3.4.1
+pyOpenSSL==23.3.0
+pyaml==24.4.0
+pyarrow-hotfix==0.6
+pyarrow==14.0.2
+pyasn1-modules==0.3.0
+pyasn1==0.5.1
+pybind11==2.12.0
+pyclipper==1.3.0.post5
+pycodestyle==2.11.1
+pycosat==0.6.6
+pycparser==2.21
+pycryptodome==3.20.0
+pyct==0.5.0
+pycuda==2024.1
+pydantic==2.5.3
+pydantic==2.7.2
+pydantic_core==2.14.6
+pydantic_core==2.18.3
+pydegensac==0.1.2
+pydicom==2.4.4
+pydocstyle==6.3.0
+pydot==1.4.2
+pydub==0.25.1
+pyemd==1.0.0
+pyerfa==2.0.1.4
+pyexcel-io==0.6.6
+pyexcel-ods==0.6.0
+pyflakes==3.2.0
+pygltflib==1.16.2
+pykalman==0.9.7
+pylibraft==24.4.0
+pylint==3.2.2
+pymc3==3.11.4
+pymongo==3.13.0
+pynndescent==0.5.12
+pynvjitlink==0.2.3
+pynvml==11.4.1
+pynvrtc==9.2
+pyparsing==3.1.1
+pyparsing==3.1.2
+pypdf==4.2.0
+pyproj==3.6.1
+pysal==24.1
+pyshp==2.3.1
+pytesseract==0.3.10
+pytest==8.2.1
+python-bidi==0.4.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.0
+python-json-logger==2.0.7
+python-louvain==0.16
+python-lsp-jsonrpc==1.1.2
+python-lsp-server==1.11.0
+python-slugify==8.0.4
+python-utils==3.8.2
+pythreejs==2.4.2
+pytoolconfig==1.3.1
+pytools==2024.1.3
+pytorch-ignite==0.5.0.post2
+pytorch-lightning==2.2.5
+pytz==2023.3.post1
+pytz==2024.1
+pyu2f==0.1.5
+pyviz_comms==3.0.2
+pyzmq==24.0.1
+pyzmq==25.1.2
+qgrid==1.3.1
+qtconsole==5.5.2
+quantecon==0.7.2
+qudida==0.0.4
+raft-dask==24.4.0
+rapids-dask-dependency==24.4.1a0
+rasterio==1.3.10
+rasterstats==0.19.0
+ray-cpp==2.9.0
+ray==2.9.0
+referencing==0.32.1
+regex==2023.12.25
+requests-oauthlib==1.3.1
+requests-toolbelt==0.10.1
+requests==2.31.0
+retrying==1.3.3
+retrying==1.3.4
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rgf-python==3.12.0
+rich-click==1.8.2
+rich==13.7.0
+rich==13.7.1
+rmm==24.4.0
+rope==1.13.0
+rpds-py==0.16.2
+rsa==4.9
+ruamel-yaml-conda==0.15.100
+ruamel.yaml.clib==0.2.7
+ruamel.yaml==0.18.5
+s2sphere==0.2.5
+s3fs==2024.3.1
+s3transfer==0.6.2
+safetensors==0.4.3
+scattertext==0.1.19
+scikit-image==0.22.0
+scikit-learn-intelex==2024.4.0
+scikit-learn==1.2.2
+scikit-multilearn==0.2.0
+scikit-optimize==0.10.1
+scikit-plot==0.3.7
+scikit-surprise==1.1.4
+scipy==1.11.4
+scipy==1.13.1
+seaborn==0.12.2
+segment_anything==1.0
+segregation==2.5
+semver==3.0.2
+sentencepiece==0.2.0
+sentry-sdk==2.3.1
+setproctitle==1.3.3
+setuptools-git==1.2
+setuptools-scm==8.1.0
+setuptools==69.0.3
+shap==0.44.1
+shapely==2.0.4
+shellingham==1.5.4
+simpervisor==1.0.0
+simplejson==3.19.2
+six==1.16.0
+sklearn-pandas==2.2.0
+slicer==0.0.7
+smart-open==6.4.0
+smmap==5.0.1
+sniffio==1.3.0
+snowballstemmer==2.2.0
+snuggs==1.4.7
+sortedcontainers==2.4.0
+soundfile==0.12.1
+soupsieve==2.5
+soxr==0.3.7
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+spacy==3.7.3
+spaghetti==1.7.5.post1
+spectral==0.23.1
+spglm==1.1.0
+sphinx-rtd-theme==0.2.4
+spint==1.0.7
+splot==1.1.5.post1
+spopt==0.6.0
+spreg==1.4.2
+spvcm==0.3.0
+sqlparse==0.4.4
+squarify==0.4.3
+srsly==2.4.8
+stable-baselines3==2.1.0
+stack-data==0.6.2
+stack-data==0.6.3
+stanio==0.5.0
+starlette==0.32.0.post1
+statsmodels==0.14.1
+stemming==1.0.1
+stop-words==2018.7.23
+stopit==1.1.2
+stumpy==1.12.0
+sympy==1.12
+tables==3.9.2
+tabulate==0.9.0
+tangled-up-in-unicode==0.2.0
+tbb==2021.12.0
+tblib==3.0.0
+tenacity==8.2.3
+tensorboard-data-server==0.7.2
+tensorboard-plugin-profile==2.15.0
+tensorboard==2.15.1
+tensorboardX==2.6.2.2
+tensorflow-cloud==0.1.16
+tensorflow-datasets==4.9.4
+tensorflow-decision-forests==1.8.1
+tensorflow-estimator==2.15.0
+tensorflow-hub==0.16.1
+tensorflow-io-gcs-filesystem==0.35.0
+tensorflow-io==0.35.0
+tensorflow-metadata==0.14.0
+tensorflow-probability==0.23.0
+tensorflow-serving-api==2.14.1
+tensorflow-text==2.15.0
+tensorflow-transform==0.14.0
+tensorflow==2.15.0
+tensorstore==0.1.59
+termcolor==2.4.0
+terminado==0.18.0
+testpath==0.6.0
+text-unidecode==1.3
+textblob==0.18.0.post0
+texttable==1.7.0
+tf_keras==2.15.1
+tfp-nightly==0.24.0.dev0
+thinc==8.2.3
+threadpoolctl==3.2.0
+tifffile==2023.12.9
+timm==1.0.3
+tinycss2==1.2.1
+tobler==0.11.2
+tokenizers==0.19.1
+toml==0.10.2
+tomli==2.0.1
+tomlkit==0.12.5
+toolz==0.12.1
+torch==2.1.2
+torchaudio==2.1.2
+torchdata==0.7.1
+torchinfo==1.8.0
+torchmetrics==1.4.0.post0
+torchtext==0.16.2
+torchvision==0.16.2
+tornado==6.3.3
+tqdm==4.66.4
+traceml==1.0.8
+traitlets==5.9.0
+traittypes==0.2.1
+transformers==4.41.1
+treelite==4.1.2
+truststore==0.8.0
+trx-python==0.2.9
+tsfresh==0.20.2
+typeguard==4.1.5
+typer==0.9.0
+typer==0.9.4
+types-python-dateutil==2.8.19.20240106
+typing-inspect==0.9.0
+typing-utils==0.1.0
+typing_extensions==4.9.0
+tzdata==2023.4
+tzdata==2024.1
+uc-micro-py==1.0.3
+ucx-py==0.37.0
+ujson==5.10.0
+umap-learn==0.5.6
+unicodedata2==15.1.0
+update-checker==0.18.0
+uri-template==1.3.0
+uritemplate==3.0.1
+urllib3==1.26.18
+urllib3==2.1.0
+urwid==2.6.12
+urwid_readline==0.14
+uvicorn==0.25.0
+uvloop==0.19.0
+vaex-astro==0.9.3
+vaex-core==4.17.1
+vaex-hdf5==0.14.1
+vaex-jupyter==0.8.2
+vaex-ml==0.18.3
+vaex-server==0.9.0
+vaex-viz==0.5.4
+vaex==4.17.0
+vec_noise==1.1.4
+vecstack==0.4.0
+virtualenv==20.21.0
+visions==0.7.5
+vowpalwabbit==9.9.0
+vtk==9.3.0
+wandb==0.17.0
+wasabi==1.1.2
+watchfiles==0.21.0
+wavio==0.0.9
+wcwidth==0.2.13
+weasel==0.3.4
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+websockets==12.0
+wfdb==4.1.2
+whatthepatch==1.0.5
+wheel==0.42.0
+widgetsnbextension==3.6.6
+witwidget==1.8.1
+woodwork==0.31.0
+wordcloud==1.9.3
+wordsegment==1.3.1
+wrapt==1.14.1
+xarray-einstats==0.7.0
+xarray==2024.5.0
+xgboost==2.0.3
+xvfbwrapper==0.2.9
+xxhash==3.4.1
+xyzservices==2024.4.0
+y-py==0.6.2
+yapf==0.40.2
+yarl==1.9.3
+yarl==1.9.4
+ydata-profiling==4.6.4
+yellowbrick==1.5
+ypy-websocket==0.8.4
+zict==3.0.0
+zipp==3.17.0
+zstandard==0.19.0

wandb/run-20240603_175449-d191dh7n/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+    "os": "Linux-5.15.133+-x86_64-with-glibc2.31",
+    "python": "3.10.13",
+    "heartbeatAt": "2024-06-03T17:54:49.804208",
+    "startedAt": "2024-06-03T17:54:49.031804",
+    "docker": null,
+    "cuda": null,
+    "args": [],
+    "state": "running",
+    "program": "kaggle.ipynb",
+    "codePathLocal": null,
+    "root": "/kaggle/working",
+    "host": "f28ebe0d2526",
+    "username": "root",
+    "executable": "/opt/conda/bin/python3.10",
+    "cpu_count": 2,
+    "cpu_count_logical": 4,
+    "cpu_freq": {
+        "current": 2000.194,
+        "min": 0.0,
+        "max": 0.0
+    },
+    "cpu_freq_per_core": [
+        {
+            "current": 2000.194,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2000.194,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2000.194,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2000.194,
+            "min": 0.0,
+            "max": 0.0
+        }
+    ],
+    "disk": {
+        "/": {
+            "total": 8062.387607574463,
+            "used": 5657.45686340332
+        }
+    },
+    "gpu": "Tesla T4",
+    "gpu_count": 2,
+    "gpu_devices": [
+        {
+            "name": "Tesla T4",
+            "memory_total": 16106127360
+        },
+        {
+            "name": "Tesla T4",
+            "memory_total": 16106127360
+        }
+    ],
+    "memory": {
+        "total": 31.357563018798828
+    }
+}

wandb/run-20240603_175449-d191dh7n/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"train/loss": 0.6481, "train/grad_norm": 5.41937255859375, "train/learning_rate": 0.00019090909090909092, "train/epoch": 0.9090909090909091, "train/global_step": 10, "_timestamp": 1717448035.9301188, "_runtime": 10746.891048908234, "_step": 28, "train_runtime": 4190.3533, "train_samples_per_second": 0.196, "train_steps_per_second": 0.053, "total_flos": 4.4695391805696e+16, "train_loss": 0.8953418861735951}

wandb/run-20240603_175449-d191dh7n/logs/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20240603_175449-d191dh7n/logs/debug.log ADDED Viewed

	@@ -0,0 +1,172 @@

+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Current SDK version is 0.17.0
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Configure stats pid to 34
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Loading settings from /kaggle/working/wandb/settings
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Applying login settings: {}
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
+2024-06-03 17:54:49,033 INFO    MainThread:34 [wandb_init.py:_log_setup():520] Logging user logs to /kaggle/working/wandb/run-20240603_175449-d191dh7n/logs/debug.log
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:_log_setup():521] Logging internal logs to /kaggle/working/wandb/run-20240603_175449-d191dh7n/logs/debug-internal.log
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:_jupyter_setup():466] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x78eae9ee9ab0>
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:init():560] calling init triggers
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:init():567] wandb.init called with sweep_config: {}
+config: {}
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:init():610] starting backend
+2024-06-03 17:54:49,034 INFO    MainThread:34 [wandb_init.py:init():614] setting up manager
+2024-06-03 17:54:49,036 INFO    MainThread:34 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-06-03 17:54:49,038 INFO    MainThread:34 [wandb_init.py:init():622] backend started and connected
+2024-06-03 17:54:49,052 INFO    MainThread:34 [wandb_run.py:_label_probe_notebook():1328] probe notebook
+2024-06-03 17:54:49,382 INFO    MainThread:34 [wandb_init.py:init():711] updated telemetry
+2024-06-03 17:54:49,386 INFO    MainThread:34 [wandb_init.py:init():744] communicating run to backend with 90.0 second timeout
+2024-06-03 17:54:49,688 INFO    MainThread:34 [wandb_run.py:_on_init():2396] communicating current version
+2024-06-03 17:54:49,771 INFO    MainThread:34 [wandb_run.py:_on_init():2405] got version response
+2024-06-03 17:54:49,772 INFO    MainThread:34 [wandb_init.py:init():795] starting run threads in backend
+2024-06-03 17:55:06,077 INFO    MainThread:34 [wandb_run.py:_console_start():2374] atexit reg
+2024-06-03 17:55:06,077 INFO    MainThread:34 [wandb_run.py:_redirect():2229] redirect: wrap_raw
+2024-06-03 17:55:06,078 INFO    MainThread:34 [wandb_run.py:_redirect():2294] Wrapping output streams.
+2024-06-03 17:55:06,078 INFO    MainThread:34 [wandb_run.py:_redirect():2319] Redirects installed.
+2024-06-03 17:55:06,081 INFO    MainThread:34 [wandb_init.py:init():838] run started, returning control to user process
+2024-06-03 17:55:06,087 INFO    MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_17-40-11_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
+2024-06-03 17:55:07,353 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 17:55:07,353 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 17:56:56,275 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 17:56:56,290 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 17:56:56,290 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 17:56:59,514 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 17:56:59,595 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 17:56:59,595 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 17:57:06,214 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 17:57:06,261 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 17:57:06,261 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:01:57,364 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:01:57,366 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:01:57,366 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:02:16,908 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:02:16,951 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:02:16,952 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:02:46,250 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:02:46,252 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:02:46,252 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:03:47,943 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:03:48,029 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:03:48,029 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:04:13,706 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:04:13,759 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:04:13,759 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:04:26,491 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:04:26,697 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:04:26,697 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:04:34,326 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:04:35,570 INFO    MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_17-40-11_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
+2024-06-03 18:18:50,784 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:18:50,784 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:41:05,951 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:41:05,953 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:41:05,953 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:45:28,892 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:45:28,927 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:45:28,927 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:45:30,228 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:45:30,229 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:45:30,229 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:45:31,254 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:45:31,276 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:45:31,276 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:45:33,122 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:45:33,358 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 18:45:33,358 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 18:45:36,415 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 18:45:37,683 INFO    MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 20, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_18-45-28_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
+2024-06-03 19:55:43,601 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 19:55:43,602 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 19:56:53,516 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 19:56:55,309 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 19:56:55,309 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:02:21,391 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:02:22,164 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:02:22,164 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:03:12,802 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:03:12,827 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:03:12,827 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:03:22,908 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:03:23,545 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:03:23,546 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:04:16,404 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:04:16,447 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:04:16,447 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:04:32,978 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:04:33,028 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:04:33,028 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:05:18,072 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:05:18,118 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:05:18,118 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:05:31,531 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:05:31,580 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:05:31,580 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:05:44,101 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:05:44,780 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:05:44,780 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:06:37,084 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:06:37,830 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:06:37,830 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:08:59,975 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:09:00,010 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:09:00,010 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:09:06,499 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:09:06,500 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:09:06,500 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:09:07,197 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:09:07,218 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:09:07,218 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:09:18,369 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:09:19,119 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:09:19,120 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:11:52,561 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:11:54,589 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:11:54,589 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:16:50,594 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:19:04,529 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:19:04,530 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:20:33,194 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:20:33,197 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:20:33,197 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:20:53,790 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:24:20,236 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:24:20,236 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:32:45,840 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:32:45,841 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:32:45,841 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:34:19,718 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:34:19,722 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:34:19,722 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:34:21,601 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:34:21,602 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:34:21,602 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:34:23,187 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:37:48,397 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:37:48,397 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:38:33,502 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:41:58,862 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:41:58,862 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:43:51,168 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:43:51,171 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:43:51,171 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:43:53,895 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:47:25,895 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:47:25,895 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:50:07,262 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:50:07,303 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:50:07,303 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:50:09,915 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:50:09,916 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:50:09,917 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:50:10,463 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:50:10,484 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:50:10,484 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:50:13,975 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:50:14,119 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-06-03 20:50:14,119 INFO    MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
+2024-06-03 20:50:15,412 INFO    MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
+2024-06-03 20:50:16,872 INFO    MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 20, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_20-50-07_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}

wandb/run-20240603_175449-d191dh7n/run-d191dh7n.wandb ADDED Viewed

Binary file (278 kB). View file