Training in progress, epoch 1
Browse files- README.md +55 -0
- adapter_config.json +28 -0
- adapter_model.safetensors +3 -0
- runs/Jun03_17-40-11_f28ebe0d2526/events.out.tfevents.1717436754.f28ebe0d2526.34.0 +3 -0
- runs/Jun03_17-40-11_f28ebe0d2526/events.out.tfevents.1717437875.f28ebe0d2526.34.1 +3 -0
- runs/Jun03_18-45-28_f28ebe0d2526/events.out.tfevents.1717440337.f28ebe0d2526.34.2 +3 -0
- runs/Jun03_20-50-07_f28ebe0d2526/events.out.tfevents.1717447816.f28ebe0d2526.34.3 +3 -0
- training_args.bin +3 -0
- wandb/debug-internal.log +0 -0
- wandb/debug.log +172 -0
- wandb/run-20240603_175449-d191dh7n/files/conda-environment.yaml +0 -0
- wandb/run-20240603_175449-d191dh7n/files/config.yaml +710 -0
- wandb/run-20240603_175449-d191dh7n/files/output.log +223 -0
- wandb/run-20240603_175449-d191dh7n/files/requirements.txt +870 -0
- wandb/run-20240603_175449-d191dh7n/files/wandb-metadata.json +66 -0
- wandb/run-20240603_175449-d191dh7n/files/wandb-summary.json +1 -0
- wandb/run-20240603_175449-d191dh7n/logs/debug-internal.log +0 -0
- wandb/run-20240603_175449-d191dh7n/logs/debug.log +172 -0
- wandb/run-20240603_175449-d191dh7n/run-d191dh7n.wandb +0 -0
README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
library_name: peft
|
4 |
+
tags:
|
5 |
+
- generated_from_trainer
|
6 |
+
base_model: tiiuae/falcon-7b
|
7 |
+
model-index:
|
8 |
+
- name: fsttModel
|
9 |
+
results: []
|
10 |
+
---
|
11 |
+
|
12 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
13 |
+
should probably proofread and complete it, then remove this comment. -->
|
14 |
+
|
15 |
+
# fsttModel
|
16 |
+
|
17 |
+
This model is a fine-tuned version of [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b) on an unknown dataset.
|
18 |
+
|
19 |
+
## Model description
|
20 |
+
|
21 |
+
More information needed
|
22 |
+
|
23 |
+
## Intended uses & limitations
|
24 |
+
|
25 |
+
More information needed
|
26 |
+
|
27 |
+
## Training and evaluation data
|
28 |
+
|
29 |
+
More information needed
|
30 |
+
|
31 |
+
## Training procedure
|
32 |
+
|
33 |
+
### Training hyperparameters
|
34 |
+
|
35 |
+
The following hyperparameters were used during training:
|
36 |
+
- learning_rate: 0.0002
|
37 |
+
- train_batch_size: 8
|
38 |
+
- eval_batch_size: 8
|
39 |
+
- seed: 42
|
40 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
41 |
+
- lr_scheduler_type: linear
|
42 |
+
- num_epochs: 20
|
43 |
+
- mixed_precision_training: Native AMP
|
44 |
+
|
45 |
+
### Training results
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
### Framework versions
|
50 |
+
|
51 |
+
- PEFT 0.11.1
|
52 |
+
- Transformers 4.41.1
|
53 |
+
- Pytorch 2.1.2
|
54 |
+
- Datasets 2.19.1
|
55 |
+
- Tokenizers 0.19.1
|
adapter_config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "tiiuae/falcon-7b",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 32,
|
14 |
+
"lora_dropout": 0.05,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 16,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": [
|
23 |
+
"query_key_value"
|
24 |
+
],
|
25 |
+
"task_type": "CAUSAL_LM",
|
26 |
+
"use_dora": false,
|
27 |
+
"use_rslora": false
|
28 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f726b0e60d373dc414e88603a56d7102d9585bb9bf270aab3bdfbc620d0619f
|
3 |
+
size 18883912
|
runs/Jun03_17-40-11_f28ebe0d2526/events.out.tfevents.1717436754.f28ebe0d2526.34.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f8e80ad4a151a5a3a320d4c416b015d3ce136e2d55f2b53290fda909dac8f1e
|
3 |
+
size 5880
|
runs/Jun03_17-40-11_f28ebe0d2526/events.out.tfevents.1717437875.f28ebe0d2526.34.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2cebdd6b17cca36ad5ad0a524ca4de1a6e7cd55bed6b3302b0b4e9f88e64053
|
3 |
+
size 12848
|
runs/Jun03_18-45-28_f28ebe0d2526/events.out.tfevents.1717440337.f28ebe0d2526.34.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a38e602952c3fd87a80023fa875b8f1bdbd3892742969ac75114bf744eb4862a
|
3 |
+
size 16622
|
runs/Jun03_20-50-07_f28ebe0d2526/events.out.tfevents.1717447816.f28ebe0d2526.34.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbd365d0298ce470a44767777c8e3e9e4884d57e8e7c53a68dc5437e64da3d58
|
3 |
+
size 11881
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9615d3520f03a04c2571da0c900c8a98c6874a896688842d49c8bf3b1731df87
|
3 |
+
size 5176
|
wandb/debug-internal.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/debug.log
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Current SDK version is 0.17.0
|
2 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Configure stats pid to 34
|
3 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
|
4 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Loading settings from /kaggle/working/wandb/settings
|
5 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
|
6 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
|
7 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
|
8 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Applying login settings: {}
|
9 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
|
10 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_init.py:_log_setup():520] Logging user logs to /kaggle/working/wandb/run-20240603_175449-d191dh7n/logs/debug.log
|
11 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:_log_setup():521] Logging internal logs to /kaggle/working/wandb/run-20240603_175449-d191dh7n/logs/debug-internal.log
|
12 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:_jupyter_setup():466] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x78eae9ee9ab0>
|
13 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:init():560] calling init triggers
|
14 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:init():567] wandb.init called with sweep_config: {}
|
15 |
+
config: {}
|
16 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:init():610] starting backend
|
17 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:init():614] setting up manager
|
18 |
+
2024-06-03 17:54:49,036 INFO MainThread:34 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
19 |
+
2024-06-03 17:54:49,038 INFO MainThread:34 [wandb_init.py:init():622] backend started and connected
|
20 |
+
2024-06-03 17:54:49,052 INFO MainThread:34 [wandb_run.py:_label_probe_notebook():1328] probe notebook
|
21 |
+
2024-06-03 17:54:49,382 INFO MainThread:34 [wandb_init.py:init():711] updated telemetry
|
22 |
+
2024-06-03 17:54:49,386 INFO MainThread:34 [wandb_init.py:init():744] communicating run to backend with 90.0 second timeout
|
23 |
+
2024-06-03 17:54:49,688 INFO MainThread:34 [wandb_run.py:_on_init():2396] communicating current version
|
24 |
+
2024-06-03 17:54:49,771 INFO MainThread:34 [wandb_run.py:_on_init():2405] got version response
|
25 |
+
2024-06-03 17:54:49,772 INFO MainThread:34 [wandb_init.py:init():795] starting run threads in backend
|
26 |
+
2024-06-03 17:55:06,077 INFO MainThread:34 [wandb_run.py:_console_start():2374] atexit reg
|
27 |
+
2024-06-03 17:55:06,077 INFO MainThread:34 [wandb_run.py:_redirect():2229] redirect: wrap_raw
|
28 |
+
2024-06-03 17:55:06,078 INFO MainThread:34 [wandb_run.py:_redirect():2294] Wrapping output streams.
|
29 |
+
2024-06-03 17:55:06,078 INFO MainThread:34 [wandb_run.py:_redirect():2319] Redirects installed.
|
30 |
+
2024-06-03 17:55:06,081 INFO MainThread:34 [wandb_init.py:init():838] run started, returning control to user process
|
31 |
+
2024-06-03 17:55:06,087 INFO MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_17-40-11_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
|
32 |
+
2024-06-03 17:55:07,353 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
33 |
+
2024-06-03 17:55:07,353 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
34 |
+
2024-06-03 17:56:56,275 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
35 |
+
2024-06-03 17:56:56,290 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
36 |
+
2024-06-03 17:56:56,290 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
37 |
+
2024-06-03 17:56:59,514 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
38 |
+
2024-06-03 17:56:59,595 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
39 |
+
2024-06-03 17:56:59,595 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
40 |
+
2024-06-03 17:57:06,214 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
41 |
+
2024-06-03 17:57:06,261 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
42 |
+
2024-06-03 17:57:06,261 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
43 |
+
2024-06-03 18:01:57,364 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
44 |
+
2024-06-03 18:01:57,366 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
45 |
+
2024-06-03 18:01:57,366 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
46 |
+
2024-06-03 18:02:16,908 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
47 |
+
2024-06-03 18:02:16,951 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
48 |
+
2024-06-03 18:02:16,952 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
49 |
+
2024-06-03 18:02:46,250 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
50 |
+
2024-06-03 18:02:46,252 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
51 |
+
2024-06-03 18:02:46,252 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
52 |
+
2024-06-03 18:03:47,943 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
53 |
+
2024-06-03 18:03:48,029 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
54 |
+
2024-06-03 18:03:48,029 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
55 |
+
2024-06-03 18:04:13,706 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
56 |
+
2024-06-03 18:04:13,759 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
57 |
+
2024-06-03 18:04:13,759 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
58 |
+
2024-06-03 18:04:26,491 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
59 |
+
2024-06-03 18:04:26,697 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
60 |
+
2024-06-03 18:04:26,697 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
61 |
+
2024-06-03 18:04:34,326 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
62 |
+
2024-06-03 18:04:35,570 INFO MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_17-40-11_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
|
63 |
+
2024-06-03 18:18:50,784 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
64 |
+
2024-06-03 18:18:50,784 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
65 |
+
2024-06-03 18:41:05,951 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
66 |
+
2024-06-03 18:41:05,953 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
67 |
+
2024-06-03 18:41:05,953 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
68 |
+
2024-06-03 18:45:28,892 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
69 |
+
2024-06-03 18:45:28,927 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
70 |
+
2024-06-03 18:45:28,927 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
71 |
+
2024-06-03 18:45:30,228 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
72 |
+
2024-06-03 18:45:30,229 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
73 |
+
2024-06-03 18:45:30,229 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
74 |
+
2024-06-03 18:45:31,254 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
75 |
+
2024-06-03 18:45:31,276 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
76 |
+
2024-06-03 18:45:31,276 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
77 |
+
2024-06-03 18:45:33,122 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
78 |
+
2024-06-03 18:45:33,358 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
79 |
+
2024-06-03 18:45:33,358 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
80 |
+
2024-06-03 18:45:36,415 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
81 |
+
2024-06-03 18:45:37,683 INFO MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 20, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_18-45-28_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
|
82 |
+
2024-06-03 19:55:43,601 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
83 |
+
2024-06-03 19:55:43,602 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
84 |
+
2024-06-03 19:56:53,516 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
85 |
+
2024-06-03 19:56:55,309 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
86 |
+
2024-06-03 19:56:55,309 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
87 |
+
2024-06-03 20:02:21,391 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
88 |
+
2024-06-03 20:02:22,164 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
89 |
+
2024-06-03 20:02:22,164 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
90 |
+
2024-06-03 20:03:12,802 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
91 |
+
2024-06-03 20:03:12,827 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
92 |
+
2024-06-03 20:03:12,827 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
93 |
+
2024-06-03 20:03:22,908 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
94 |
+
2024-06-03 20:03:23,545 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
95 |
+
2024-06-03 20:03:23,546 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
96 |
+
2024-06-03 20:04:16,404 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
97 |
+
2024-06-03 20:04:16,447 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
98 |
+
2024-06-03 20:04:16,447 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
99 |
+
2024-06-03 20:04:32,978 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
100 |
+
2024-06-03 20:04:33,028 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
101 |
+
2024-06-03 20:04:33,028 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
102 |
+
2024-06-03 20:05:18,072 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
103 |
+
2024-06-03 20:05:18,118 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
104 |
+
2024-06-03 20:05:18,118 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
105 |
+
2024-06-03 20:05:31,531 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
106 |
+
2024-06-03 20:05:31,580 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
107 |
+
2024-06-03 20:05:31,580 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
108 |
+
2024-06-03 20:05:44,101 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
109 |
+
2024-06-03 20:05:44,780 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
110 |
+
2024-06-03 20:05:44,780 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
111 |
+
2024-06-03 20:06:37,084 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
112 |
+
2024-06-03 20:06:37,830 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
113 |
+
2024-06-03 20:06:37,830 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
114 |
+
2024-06-03 20:08:59,975 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
115 |
+
2024-06-03 20:09:00,010 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
116 |
+
2024-06-03 20:09:00,010 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
117 |
+
2024-06-03 20:09:06,499 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
118 |
+
2024-06-03 20:09:06,500 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
119 |
+
2024-06-03 20:09:06,500 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
120 |
+
2024-06-03 20:09:07,197 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
121 |
+
2024-06-03 20:09:07,218 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
122 |
+
2024-06-03 20:09:07,218 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
123 |
+
2024-06-03 20:09:18,369 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
124 |
+
2024-06-03 20:09:19,119 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
125 |
+
2024-06-03 20:09:19,120 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
126 |
+
2024-06-03 20:11:52,561 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
127 |
+
2024-06-03 20:11:54,589 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
128 |
+
2024-06-03 20:11:54,589 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
129 |
+
2024-06-03 20:16:50,594 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
130 |
+
2024-06-03 20:19:04,529 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
131 |
+
2024-06-03 20:19:04,530 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
132 |
+
2024-06-03 20:20:33,194 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
133 |
+
2024-06-03 20:20:33,197 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
134 |
+
2024-06-03 20:20:33,197 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
135 |
+
2024-06-03 20:20:53,790 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
136 |
+
2024-06-03 20:24:20,236 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
137 |
+
2024-06-03 20:24:20,236 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
138 |
+
2024-06-03 20:32:45,840 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
139 |
+
2024-06-03 20:32:45,841 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
140 |
+
2024-06-03 20:32:45,841 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
141 |
+
2024-06-03 20:34:19,718 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
142 |
+
2024-06-03 20:34:19,722 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
143 |
+
2024-06-03 20:34:19,722 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
144 |
+
2024-06-03 20:34:21,601 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
145 |
+
2024-06-03 20:34:21,602 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
146 |
+
2024-06-03 20:34:21,602 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
147 |
+
2024-06-03 20:34:23,187 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
148 |
+
2024-06-03 20:37:48,397 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
149 |
+
2024-06-03 20:37:48,397 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
150 |
+
2024-06-03 20:38:33,502 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
151 |
+
2024-06-03 20:41:58,862 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
152 |
+
2024-06-03 20:41:58,862 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
153 |
+
2024-06-03 20:43:51,168 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
154 |
+
2024-06-03 20:43:51,171 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
155 |
+
2024-06-03 20:43:51,171 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
156 |
+
2024-06-03 20:43:53,895 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
157 |
+
2024-06-03 20:47:25,895 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
158 |
+
2024-06-03 20:47:25,895 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
159 |
+
2024-06-03 20:50:07,262 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
160 |
+
2024-06-03 20:50:07,303 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
161 |
+
2024-06-03 20:50:07,303 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
162 |
+
2024-06-03 20:50:09,915 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
163 |
+
2024-06-03 20:50:09,916 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
164 |
+
2024-06-03 20:50:09,917 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
165 |
+
2024-06-03 20:50:10,463 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
166 |
+
2024-06-03 20:50:10,484 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
167 |
+
2024-06-03 20:50:10,484 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
168 |
+
2024-06-03 20:50:13,975 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
169 |
+
2024-06-03 20:50:14,119 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
170 |
+
2024-06-03 20:50:14,119 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
171 |
+
2024-06-03 20:50:15,412 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
172 |
+
2024-06-03 20:50:16,872 INFO MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 20, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_20-50-07_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
|
wandb/run-20240603_175449-d191dh7n/files/conda-environment.yaml
ADDED
File without changes
|
wandb/run-20240603_175449-d191dh7n/files/config.yaml
ADDED
@@ -0,0 +1,710 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
python_version: 3.10.13
|
7 |
+
cli_version: 0.17.0
|
8 |
+
framework: huggingface
|
9 |
+
huggingface_version: 4.41.1
|
10 |
+
is_jupyter_run: true
|
11 |
+
is_kaggle_kernel: true
|
12 |
+
start_time: 1717437289
|
13 |
+
t:
|
14 |
+
1:
|
15 |
+
- 1
|
16 |
+
- 2
|
17 |
+
- 3
|
18 |
+
- 5
|
19 |
+
- 11
|
20 |
+
- 12
|
21 |
+
- 49
|
22 |
+
- 51
|
23 |
+
- 53
|
24 |
+
- 55
|
25 |
+
- 71
|
26 |
+
- 98
|
27 |
+
- 105
|
28 |
+
2:
|
29 |
+
- 1
|
30 |
+
- 2
|
31 |
+
- 3
|
32 |
+
- 5
|
33 |
+
- 11
|
34 |
+
- 12
|
35 |
+
- 49
|
36 |
+
- 51
|
37 |
+
- 53
|
38 |
+
- 55
|
39 |
+
- 71
|
40 |
+
- 98
|
41 |
+
- 105
|
42 |
+
3:
|
43 |
+
- 7
|
44 |
+
- 13
|
45 |
+
- 23
|
46 |
+
- 62
|
47 |
+
- 66
|
48 |
+
4: 3.10.13
|
49 |
+
5: 0.17.0
|
50 |
+
6: 4.41.1
|
51 |
+
8:
|
52 |
+
- 1
|
53 |
+
- 2
|
54 |
+
- 5
|
55 |
+
9:
|
56 |
+
1: transformers_trainer
|
57 |
+
13: linux-x86_64
|
58 |
+
m:
|
59 |
+
- 1: train/global_step
|
60 |
+
6:
|
61 |
+
- 3
|
62 |
+
- 1: train/loss
|
63 |
+
5: 1
|
64 |
+
6:
|
65 |
+
- 1
|
66 |
+
- 1: train/grad_norm
|
67 |
+
5: 1
|
68 |
+
6:
|
69 |
+
- 1
|
70 |
+
- 1: train/learning_rate
|
71 |
+
5: 1
|
72 |
+
6:
|
73 |
+
- 1
|
74 |
+
- 1: train/epoch
|
75 |
+
5: 1
|
76 |
+
6:
|
77 |
+
- 1
|
78 |
+
vocab_size:
|
79 |
+
desc: null
|
80 |
+
value: 65024
|
81 |
+
hidden_size:
|
82 |
+
desc: null
|
83 |
+
value: 4544
|
84 |
+
num_hidden_layers:
|
85 |
+
desc: null
|
86 |
+
value: 32
|
87 |
+
num_attention_heads:
|
88 |
+
desc: null
|
89 |
+
value: 71
|
90 |
+
layer_norm_epsilon:
|
91 |
+
desc: null
|
92 |
+
value: 1.0e-05
|
93 |
+
initializer_range:
|
94 |
+
desc: null
|
95 |
+
value: 0.02
|
96 |
+
use_cache:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
hidden_dropout:
|
100 |
+
desc: null
|
101 |
+
value: 0.0
|
102 |
+
attention_dropout:
|
103 |
+
desc: null
|
104 |
+
value: 0.0
|
105 |
+
bos_token_id:
|
106 |
+
desc: null
|
107 |
+
value: 11
|
108 |
+
eos_token_id:
|
109 |
+
desc: null
|
110 |
+
value: 11
|
111 |
+
num_kv_heads:
|
112 |
+
desc: null
|
113 |
+
value: 71
|
114 |
+
alibi:
|
115 |
+
desc: null
|
116 |
+
value: false
|
117 |
+
new_decoder_architecture:
|
118 |
+
desc: null
|
119 |
+
value: false
|
120 |
+
multi_query:
|
121 |
+
desc: null
|
122 |
+
value: true
|
123 |
+
parallel_attn:
|
124 |
+
desc: null
|
125 |
+
value: true
|
126 |
+
bias:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
return_dict:
|
130 |
+
desc: null
|
131 |
+
value: true
|
132 |
+
output_hidden_states:
|
133 |
+
desc: null
|
134 |
+
value: false
|
135 |
+
output_attentions:
|
136 |
+
desc: null
|
137 |
+
value: false
|
138 |
+
torchscript:
|
139 |
+
desc: null
|
140 |
+
value: false
|
141 |
+
torch_dtype:
|
142 |
+
desc: null
|
143 |
+
value: bfloat16
|
144 |
+
use_bfloat16:
|
145 |
+
desc: null
|
146 |
+
value: false
|
147 |
+
tf_legacy_loss:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
pruned_heads:
|
151 |
+
desc: null
|
152 |
+
value: {}
|
153 |
+
tie_word_embeddings:
|
154 |
+
desc: null
|
155 |
+
value: true
|
156 |
+
chunk_size_feed_forward:
|
157 |
+
desc: null
|
158 |
+
value: 0
|
159 |
+
is_encoder_decoder:
|
160 |
+
desc: null
|
161 |
+
value: false
|
162 |
+
is_decoder:
|
163 |
+
desc: null
|
164 |
+
value: false
|
165 |
+
cross_attention_hidden_size:
|
166 |
+
desc: null
|
167 |
+
value: null
|
168 |
+
add_cross_attention:
|
169 |
+
desc: null
|
170 |
+
value: false
|
171 |
+
tie_encoder_decoder:
|
172 |
+
desc: null
|
173 |
+
value: false
|
174 |
+
max_length:
|
175 |
+
desc: null
|
176 |
+
value: 20
|
177 |
+
min_length:
|
178 |
+
desc: null
|
179 |
+
value: 0
|
180 |
+
do_sample:
|
181 |
+
desc: null
|
182 |
+
value: false
|
183 |
+
early_stopping:
|
184 |
+
desc: null
|
185 |
+
value: false
|
186 |
+
num_beams:
|
187 |
+
desc: null
|
188 |
+
value: 1
|
189 |
+
num_beam_groups:
|
190 |
+
desc: null
|
191 |
+
value: 1
|
192 |
+
diversity_penalty:
|
193 |
+
desc: null
|
194 |
+
value: 0.0
|
195 |
+
temperature:
|
196 |
+
desc: null
|
197 |
+
value: 1.0
|
198 |
+
top_k:
|
199 |
+
desc: null
|
200 |
+
value: 50
|
201 |
+
top_p:
|
202 |
+
desc: null
|
203 |
+
value: 1.0
|
204 |
+
typical_p:
|
205 |
+
desc: null
|
206 |
+
value: 1.0
|
207 |
+
repetition_penalty:
|
208 |
+
desc: null
|
209 |
+
value: 1.0
|
210 |
+
length_penalty:
|
211 |
+
desc: null
|
212 |
+
value: 1.0
|
213 |
+
no_repeat_ngram_size:
|
214 |
+
desc: null
|
215 |
+
value: 0
|
216 |
+
encoder_no_repeat_ngram_size:
|
217 |
+
desc: null
|
218 |
+
value: 0
|
219 |
+
bad_words_ids:
|
220 |
+
desc: null
|
221 |
+
value: null
|
222 |
+
num_return_sequences:
|
223 |
+
desc: null
|
224 |
+
value: 1
|
225 |
+
output_scores:
|
226 |
+
desc: null
|
227 |
+
value: false
|
228 |
+
return_dict_in_generate:
|
229 |
+
desc: null
|
230 |
+
value: false
|
231 |
+
forced_bos_token_id:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
forced_eos_token_id:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_invalid_values:
|
238 |
+
desc: null
|
239 |
+
value: false
|
240 |
+
exponential_decay_length_penalty:
|
241 |
+
desc: null
|
242 |
+
value: null
|
243 |
+
suppress_tokens:
|
244 |
+
desc: null
|
245 |
+
value: null
|
246 |
+
begin_suppress_tokens:
|
247 |
+
desc: null
|
248 |
+
value: null
|
249 |
+
architectures:
|
250 |
+
desc: null
|
251 |
+
value:
|
252 |
+
- FalconForCausalLM
|
253 |
+
finetuning_task:
|
254 |
+
desc: null
|
255 |
+
value: null
|
256 |
+
id2label:
|
257 |
+
desc: null
|
258 |
+
value:
|
259 |
+
'0': LABEL_0
|
260 |
+
'1': LABEL_1
|
261 |
+
label2id:
|
262 |
+
desc: null
|
263 |
+
value:
|
264 |
+
LABEL_0: 0
|
265 |
+
LABEL_1: 1
|
266 |
+
tokenizer_class:
|
267 |
+
desc: null
|
268 |
+
value: null
|
269 |
+
prefix:
|
270 |
+
desc: null
|
271 |
+
value: null
|
272 |
+
pad_token_id:
|
273 |
+
desc: null
|
274 |
+
value: null
|
275 |
+
sep_token_id:
|
276 |
+
desc: null
|
277 |
+
value: null
|
278 |
+
decoder_start_token_id:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
task_specific_params:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
problem_type:
|
285 |
+
desc: null
|
286 |
+
value: null
|
287 |
+
_name_or_path:
|
288 |
+
desc: null
|
289 |
+
value: tiiuae/falcon-7b
|
290 |
+
transformers_version:
|
291 |
+
desc: null
|
292 |
+
value: 4.41.1
|
293 |
+
apply_residual_connection_post_layernorm:
|
294 |
+
desc: null
|
295 |
+
value: false
|
296 |
+
auto_map:
|
297 |
+
desc: null
|
298 |
+
value:
|
299 |
+
AutoConfig: tiiuae/falcon-7b--configuration_falcon.FalconConfig
|
300 |
+
AutoModel: tiiuae/falcon-7b--modeling_falcon.FalconModel
|
301 |
+
AutoModelForSequenceClassification: tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification
|
302 |
+
AutoModelForTokenClassification: tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification
|
303 |
+
AutoModelForQuestionAnswering: tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering
|
304 |
+
AutoModelForCausalLM: tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM
|
305 |
+
model_type:
|
306 |
+
desc: null
|
307 |
+
value: falcon
|
308 |
+
quantization_config:
|
309 |
+
desc: null
|
310 |
+
value:
|
311 |
+
quant_method: QuantizationMethod.BITS_AND_BYTES
|
312 |
+
_load_in_8bit: false
|
313 |
+
_load_in_4bit: true
|
314 |
+
llm_int8_threshold: 6.0
|
315 |
+
llm_int8_skip_modules: null
|
316 |
+
llm_int8_enable_fp32_cpu_offload: false
|
317 |
+
llm_int8_has_fp16_weight: false
|
318 |
+
bnb_4bit_quant_type: nf4
|
319 |
+
bnb_4bit_use_double_quant: false
|
320 |
+
bnb_4bit_compute_dtype: bfloat16
|
321 |
+
bnb_4bit_quant_storage: uint8
|
322 |
+
load_in_4bit: true
|
323 |
+
load_in_8bit: false
|
324 |
+
output_dir:
|
325 |
+
desc: null
|
326 |
+
value: /kaggle/working/
|
327 |
+
overwrite_output_dir:
|
328 |
+
desc: null
|
329 |
+
value: false
|
330 |
+
do_train:
|
331 |
+
desc: null
|
332 |
+
value: false
|
333 |
+
do_eval:
|
334 |
+
desc: null
|
335 |
+
value: false
|
336 |
+
do_predict:
|
337 |
+
desc: null
|
338 |
+
value: false
|
339 |
+
eval_strategy:
|
340 |
+
desc: null
|
341 |
+
value: 'no'
|
342 |
+
prediction_loss_only:
|
343 |
+
desc: null
|
344 |
+
value: false
|
345 |
+
per_device_train_batch_size:
|
346 |
+
desc: null
|
347 |
+
value: 8
|
348 |
+
per_device_eval_batch_size:
|
349 |
+
desc: null
|
350 |
+
value: 8
|
351 |
+
per_gpu_train_batch_size:
|
352 |
+
desc: null
|
353 |
+
value: null
|
354 |
+
per_gpu_eval_batch_size:
|
355 |
+
desc: null
|
356 |
+
value: null
|
357 |
+
gradient_accumulation_steps:
|
358 |
+
desc: null
|
359 |
+
value: 1
|
360 |
+
eval_accumulation_steps:
|
361 |
+
desc: null
|
362 |
+
value: null
|
363 |
+
eval_delay:
|
364 |
+
desc: null
|
365 |
+
value: 0
|
366 |
+
learning_rate:
|
367 |
+
desc: null
|
368 |
+
value: 0.0002
|
369 |
+
weight_decay:
|
370 |
+
desc: null
|
371 |
+
value: 0.0
|
372 |
+
adam_beta1:
|
373 |
+
desc: null
|
374 |
+
value: 0.9
|
375 |
+
adam_beta2:
|
376 |
+
desc: null
|
377 |
+
value: 0.999
|
378 |
+
adam_epsilon:
|
379 |
+
desc: null
|
380 |
+
value: 1.0e-08
|
381 |
+
max_grad_norm:
|
382 |
+
desc: null
|
383 |
+
value: 1.0
|
384 |
+
num_train_epochs:
|
385 |
+
desc: null
|
386 |
+
value: 20
|
387 |
+
max_steps:
|
388 |
+
desc: null
|
389 |
+
value: -1
|
390 |
+
lr_scheduler_type:
|
391 |
+
desc: null
|
392 |
+
value: linear
|
393 |
+
lr_scheduler_kwargs:
|
394 |
+
desc: null
|
395 |
+
value: {}
|
396 |
+
warmup_ratio:
|
397 |
+
desc: null
|
398 |
+
value: 0.0
|
399 |
+
warmup_steps:
|
400 |
+
desc: null
|
401 |
+
value: 0
|
402 |
+
log_level:
|
403 |
+
desc: null
|
404 |
+
value: passive
|
405 |
+
log_level_replica:
|
406 |
+
desc: null
|
407 |
+
value: warning
|
408 |
+
log_on_each_node:
|
409 |
+
desc: null
|
410 |
+
value: true
|
411 |
+
logging_dir:
|
412 |
+
desc: null
|
413 |
+
value: /kaggle/working/runs/Jun03_20-50-07_f28ebe0d2526
|
414 |
+
logging_strategy:
|
415 |
+
desc: null
|
416 |
+
value: steps
|
417 |
+
logging_first_step:
|
418 |
+
desc: null
|
419 |
+
value: false
|
420 |
+
logging_steps:
|
421 |
+
desc: null
|
422 |
+
value: 10
|
423 |
+
logging_nan_inf_filter:
|
424 |
+
desc: null
|
425 |
+
value: true
|
426 |
+
save_strategy:
|
427 |
+
desc: null
|
428 |
+
value: epoch
|
429 |
+
save_steps:
|
430 |
+
desc: null
|
431 |
+
value: 500
|
432 |
+
save_total_limit:
|
433 |
+
desc: null
|
434 |
+
value: 4
|
435 |
+
save_safetensors:
|
436 |
+
desc: null
|
437 |
+
value: true
|
438 |
+
save_on_each_node:
|
439 |
+
desc: null
|
440 |
+
value: false
|
441 |
+
save_only_model:
|
442 |
+
desc: null
|
443 |
+
value: false
|
444 |
+
restore_callback_states_from_checkpoint:
|
445 |
+
desc: null
|
446 |
+
value: false
|
447 |
+
no_cuda:
|
448 |
+
desc: null
|
449 |
+
value: false
|
450 |
+
use_cpu:
|
451 |
+
desc: null
|
452 |
+
value: false
|
453 |
+
use_mps_device:
|
454 |
+
desc: null
|
455 |
+
value: false
|
456 |
+
seed:
|
457 |
+
desc: null
|
458 |
+
value: 42
|
459 |
+
data_seed:
|
460 |
+
desc: null
|
461 |
+
value: null
|
462 |
+
jit_mode_eval:
|
463 |
+
desc: null
|
464 |
+
value: false
|
465 |
+
use_ipex:
|
466 |
+
desc: null
|
467 |
+
value: false
|
468 |
+
bf16:
|
469 |
+
desc: null
|
470 |
+
value: false
|
471 |
+
fp16:
|
472 |
+
desc: null
|
473 |
+
value: true
|
474 |
+
fp16_opt_level:
|
475 |
+
desc: null
|
476 |
+
value: O1
|
477 |
+
half_precision_backend:
|
478 |
+
desc: null
|
479 |
+
value: auto
|
480 |
+
bf16_full_eval:
|
481 |
+
desc: null
|
482 |
+
value: false
|
483 |
+
fp16_full_eval:
|
484 |
+
desc: null
|
485 |
+
value: false
|
486 |
+
tf32:
|
487 |
+
desc: null
|
488 |
+
value: null
|
489 |
+
local_rank:
|
490 |
+
desc: null
|
491 |
+
value: 0
|
492 |
+
ddp_backend:
|
493 |
+
desc: null
|
494 |
+
value: null
|
495 |
+
tpu_num_cores:
|
496 |
+
desc: null
|
497 |
+
value: null
|
498 |
+
tpu_metrics_debug:
|
499 |
+
desc: null
|
500 |
+
value: false
|
501 |
+
debug:
|
502 |
+
desc: null
|
503 |
+
value: []
|
504 |
+
dataloader_drop_last:
|
505 |
+
desc: null
|
506 |
+
value: false
|
507 |
+
eval_steps:
|
508 |
+
desc: null
|
509 |
+
value: null
|
510 |
+
dataloader_num_workers:
|
511 |
+
desc: null
|
512 |
+
value: 0
|
513 |
+
dataloader_prefetch_factor:
|
514 |
+
desc: null
|
515 |
+
value: null
|
516 |
+
past_index:
|
517 |
+
desc: null
|
518 |
+
value: -1
|
519 |
+
run_name:
|
520 |
+
desc: null
|
521 |
+
value: /kaggle/working/
|
522 |
+
disable_tqdm:
|
523 |
+
desc: null
|
524 |
+
value: false
|
525 |
+
remove_unused_columns:
|
526 |
+
desc: null
|
527 |
+
value: true
|
528 |
+
label_names:
|
529 |
+
desc: null
|
530 |
+
value: null
|
531 |
+
load_best_model_at_end:
|
532 |
+
desc: null
|
533 |
+
value: false
|
534 |
+
metric_for_best_model:
|
535 |
+
desc: null
|
536 |
+
value: null
|
537 |
+
greater_is_better:
|
538 |
+
desc: null
|
539 |
+
value: null
|
540 |
+
ignore_data_skip:
|
541 |
+
desc: null
|
542 |
+
value: false
|
543 |
+
fsdp:
|
544 |
+
desc: null
|
545 |
+
value: []
|
546 |
+
fsdp_min_num_params:
|
547 |
+
desc: null
|
548 |
+
value: 0
|
549 |
+
fsdp_config:
|
550 |
+
desc: null
|
551 |
+
value:
|
552 |
+
min_num_params: 0
|
553 |
+
xla: false
|
554 |
+
xla_fsdp_v2: false
|
555 |
+
xla_fsdp_grad_ckpt: false
|
556 |
+
fsdp_transformer_layer_cls_to_wrap:
|
557 |
+
desc: null
|
558 |
+
value: null
|
559 |
+
accelerator_config:
|
560 |
+
desc: null
|
561 |
+
value:
|
562 |
+
split_batches: false
|
563 |
+
dispatch_batches: null
|
564 |
+
even_batches: true
|
565 |
+
use_seedable_sampler: true
|
566 |
+
non_blocking: false
|
567 |
+
gradient_accumulation_kwargs: null
|
568 |
+
deepspeed:
|
569 |
+
desc: null
|
570 |
+
value: null
|
571 |
+
label_smoothing_factor:
|
572 |
+
desc: null
|
573 |
+
value: 0.0
|
574 |
+
optim:
|
575 |
+
desc: null
|
576 |
+
value: adamw_torch
|
577 |
+
optim_args:
|
578 |
+
desc: null
|
579 |
+
value: null
|
580 |
+
adafactor:
|
581 |
+
desc: null
|
582 |
+
value: false
|
583 |
+
group_by_length:
|
584 |
+
desc: null
|
585 |
+
value: false
|
586 |
+
length_column_name:
|
587 |
+
desc: null
|
588 |
+
value: length
|
589 |
+
report_to:
|
590 |
+
desc: null
|
591 |
+
value:
|
592 |
+
- tensorboard
|
593 |
+
- wandb
|
594 |
+
ddp_find_unused_parameters:
|
595 |
+
desc: null
|
596 |
+
value: null
|
597 |
+
ddp_bucket_cap_mb:
|
598 |
+
desc: null
|
599 |
+
value: null
|
600 |
+
ddp_broadcast_buffers:
|
601 |
+
desc: null
|
602 |
+
value: null
|
603 |
+
dataloader_pin_memory:
|
604 |
+
desc: null
|
605 |
+
value: true
|
606 |
+
dataloader_persistent_workers:
|
607 |
+
desc: null
|
608 |
+
value: false
|
609 |
+
skip_memory_metrics:
|
610 |
+
desc: null
|
611 |
+
value: true
|
612 |
+
use_legacy_prediction_loop:
|
613 |
+
desc: null
|
614 |
+
value: false
|
615 |
+
push_to_hub:
|
616 |
+
desc: null
|
617 |
+
value: true
|
618 |
+
resume_from_checkpoint:
|
619 |
+
desc: null
|
620 |
+
value: null
|
621 |
+
hub_model_id:
|
622 |
+
desc: null
|
623 |
+
value: othmanfa/fsttModel
|
624 |
+
hub_strategy:
|
625 |
+
desc: null
|
626 |
+
value: every_save
|
627 |
+
hub_token:
|
628 |
+
desc: null
|
629 |
+
value: <HUB_TOKEN>
|
630 |
+
hub_private_repo:
|
631 |
+
desc: null
|
632 |
+
value: false
|
633 |
+
hub_always_push:
|
634 |
+
desc: null
|
635 |
+
value: false
|
636 |
+
gradient_checkpointing:
|
637 |
+
desc: null
|
638 |
+
value: false
|
639 |
+
gradient_checkpointing_kwargs:
|
640 |
+
desc: null
|
641 |
+
value: null
|
642 |
+
include_inputs_for_metrics:
|
643 |
+
desc: null
|
644 |
+
value: false
|
645 |
+
eval_do_concat_batches:
|
646 |
+
desc: null
|
647 |
+
value: true
|
648 |
+
fp16_backend:
|
649 |
+
desc: null
|
650 |
+
value: auto
|
651 |
+
evaluation_strategy:
|
652 |
+
desc: null
|
653 |
+
value: null
|
654 |
+
push_to_hub_model_id:
|
655 |
+
desc: null
|
656 |
+
value: null
|
657 |
+
push_to_hub_organization:
|
658 |
+
desc: null
|
659 |
+
value: null
|
660 |
+
push_to_hub_token:
|
661 |
+
desc: null
|
662 |
+
value: <PUSH_TO_HUB_TOKEN>
|
663 |
+
mp_parameters:
|
664 |
+
desc: null
|
665 |
+
value: ''
|
666 |
+
auto_find_batch_size:
|
667 |
+
desc: null
|
668 |
+
value: true
|
669 |
+
full_determinism:
|
670 |
+
desc: null
|
671 |
+
value: false
|
672 |
+
torchdynamo:
|
673 |
+
desc: null
|
674 |
+
value: null
|
675 |
+
ray_scope:
|
676 |
+
desc: null
|
677 |
+
value: last
|
678 |
+
ddp_timeout:
|
679 |
+
desc: null
|
680 |
+
value: 1800
|
681 |
+
torch_compile:
|
682 |
+
desc: null
|
683 |
+
value: false
|
684 |
+
torch_compile_backend:
|
685 |
+
desc: null
|
686 |
+
value: null
|
687 |
+
torch_compile_mode:
|
688 |
+
desc: null
|
689 |
+
value: null
|
690 |
+
dispatch_batches:
|
691 |
+
desc: null
|
692 |
+
value: null
|
693 |
+
split_batches:
|
694 |
+
desc: null
|
695 |
+
value: null
|
696 |
+
include_tokens_per_second:
|
697 |
+
desc: null
|
698 |
+
value: false
|
699 |
+
include_num_input_tokens_seen:
|
700 |
+
desc: null
|
701 |
+
value: false
|
702 |
+
neftune_noise_alpha:
|
703 |
+
desc: null
|
704 |
+
value: null
|
705 |
+
optim_target_modules:
|
706 |
+
desc: null
|
707 |
+
value: null
|
708 |
+
batch_eval_metrics:
|
709 |
+
desc: null
|
710 |
+
value: false
|
wandb/run-20240603_175449-d191dh7n/files/output.log
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
2 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
3 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
4 |
+
warnings.warn(
|
5 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
6 |
+
warnings.warn(
|
7 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
8 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
9 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
10 |
+
warnings.warn(
|
11 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
12 |
+
warnings.warn(
|
13 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
14 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
15 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
16 |
+
warnings.warn(
|
17 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
18 |
+
warnings.warn(
|
19 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
20 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
21 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
22 |
+
warnings.warn(
|
23 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
24 |
+
warnings.warn(
|
25 |
+
2
|
26 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
27 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
28 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
29 |
+
warnings.warn(
|
30 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
31 |
+
warnings.warn(
|
32 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
33 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
34 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
35 |
+
warnings.warn(
|
36 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
37 |
+
warnings.warn(
|
38 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
39 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
40 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
41 |
+
warnings.warn(
|
42 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
43 |
+
warnings.warn(
|
44 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
45 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
46 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
47 |
+
warnings.warn(
|
48 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
49 |
+
warnings.warn(
|
50 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
51 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
52 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
53 |
+
warnings.warn(
|
54 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
55 |
+
warnings.warn(
|
56 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
57 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
58 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
59 |
+
warnings.warn(
|
60 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
61 |
+
warnings.warn(
|
62 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
63 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
64 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
65 |
+
warnings.warn(
|
66 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
67 |
+
warnings.warn(
|
68 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
69 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
70 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
71 |
+
warnings.warn(
|
72 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
73 |
+
warnings.warn(
|
74 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
75 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
76 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
77 |
+
warnings.warn(
|
78 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
79 |
+
warnings.warn(
|
80 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
81 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
82 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
83 |
+
warnings.warn(
|
84 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
85 |
+
warnings.warn(
|
86 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
87 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
88 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
89 |
+
warnings.warn(
|
90 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
91 |
+
warnings.warn(
|
92 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
93 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
94 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
95 |
+
warnings.warn(
|
96 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
97 |
+
warnings.warn(
|
98 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
99 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
100 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
101 |
+
warnings.warn(
|
102 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
103 |
+
warnings.warn(
|
104 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
105 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
106 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
107 |
+
warnings.warn(
|
108 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
109 |
+
warnings.warn(
|
110 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
111 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
112 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
113 |
+
warnings.warn(
|
114 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
115 |
+
warnings.warn(
|
116 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
117 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
118 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
119 |
+
warnings.warn(
|
120 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
121 |
+
warnings.warn(
|
122 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
123 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
124 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
125 |
+
warnings.warn(
|
126 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
127 |
+
warnings.warn(
|
128 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
129 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
130 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
131 |
+
warnings.warn(
|
132 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
133 |
+
warnings.warn(
|
134 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
135 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
136 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
137 |
+
warnings.warn(
|
138 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
139 |
+
warnings.warn(
|
140 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
141 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
142 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
143 |
+
warnings.warn(
|
144 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
145 |
+
warnings.warn(
|
146 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
147 |
+
warnings.warn(
|
148 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
149 |
+
warnings.warn(
|
150 |
+
/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1659: UserWarning: You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on cpu, whereas the model is on cuda. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('cuda') before running `.generate()`.
|
151 |
+
warnings.warn(
|
152 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
153 |
+
warnings.warn(
|
154 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None
|
155 |
+
warnings.warn(
|
156 |
+
/opt/conda/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:515: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.
|
157 |
+
warnings.warn(
|
158 |
+
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
|
159 |
+
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
|
160 |
+
quel est le type de formation de filiere Analyse Appliquée et Ingénierie Statistique (AAS)?
|
161 |
+
Bonjour,
|
162 |
+
Je suis actuellement en 2ème année de licence mathématiques et je souhaite intégrer une formation d'Analyse Appliquée et Ingénierie Statistique.
|
163 |
+
J'ai entendu dire que cette formation était très difficile et qu'il fallait avoir un très bon niveau en mathématiques.
|
164 |
+
Est
|
165 |
+
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
|
166 |
+
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
|
167 |
+
Type Formation: master
|
168 |
+
Nom Filiere: Analyse Appliquée et Ingénierie Statistique
|
169 |
+
Objectifs:
|
170 |
+
Le Master Analyse Appliquée et Ingénierie Statistique (MAAIS) de l'Université Abdelmalek Essaâdi a pour objectif de former des ingénieurs capables d’apporter une réponse scientifique et technologique à des questions issues de secteurs différents (Industrie, Finance, Santé, Informatique, Marketing, etc.). Les ingénieurs diplômés de la filière MAAIS seront reconnus pour leur capacité d’innovation, leur esprit d’entreprise et leur ouverture sur le monde.
|
171 |
+
Avec la multiplication des données issues de l’internet des objets, de l’analyse du comportement des consommateurs, des données médicales, des systèmes de trading haute fréquence etc., et la nécessité d’intégrer des technologies analyt
|
172 |
+
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
|
173 |
+
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
|
174 |
+
Type Formation: master
|
175 |
+
Nom Filiere: Analyse Appliquée et Ingénierie Statistique
|
176 |
+
Programme:
|
177 |
+
Le cursus mathématiques appliquée et statistique vise à former des ingénieurs capables de faire la synthèse entre les mathématiques et les applications avec comme objectifs :
|
178 |
+
- De former des ingénieurs polyvalents, ayant des compétences en Mathématiques, et en même temps aptes à faire face à des problèmes de type scientifique et industrielle
|
179 |
+
- D’enseigner des mathématiques appliquées et statistique à l’enseignement secondaire
|
180 |
+
Programme:
|
181 |
+
Mathématiques et Calcul Scientifique : : MASP1 (développement d’applications sur ordinateur et méthodes de résolution des équations et optimisation) : MASP2 (probabilités, théorie des
|
182 |
+
Time to retrieve answer: 205.20784872600052
|
183 |
+
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
|
184 |
+
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
|
185 |
+
Type Formation: master
|
186 |
+
Nom Filiere: Analyse Appliquée et Ingénierie Statistique
|
187 |
+
Programme:
|
188 |
+
Nom Module:
|
189 |
+
Options: Communication de la recherche
|
190 |
+
Analyse statistique
|
191 |
+
Modélisation
|
192 |
+
Data Mining
|
193 |
+
Analyse de données
|
194 |
+
Optimisation
|
195 |
+
Data Science
|
196 |
+
Entreprendre son projet
|
197 |
+
|
198 |
+
Date: 01 sept 2022 au 31 août 2024 semestre 2 : Analyse statistique
|
199 |
+
et données numériques
|
200 |
+
et outils
|
201 |
+
d’optimisation
|
202 |
+
et d’estimation
|
203 |
+
Time to retrieve answer: 205.3583044219995
|
204 |
+
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
|
205 |
+
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
|
206 |
+
le nom de coordinateur de master Analyse Appliquée et Ingénierie Statistique est M.
|
207 |
+
El-Gharbawi. Il a obtenu son doctorat en mathématiques appliquées et sa spécialisation est l’Analyse des Données en 2007 à l’université de Montréal au Canada. Il a aussi obtenu un diplôme d’expert en données massives en 2019.
|
208 |
+
Il a également obtenu un diplôme de master en Analyse des données massives en 2022.
|
209 |
+
Il a également obtenu un master en statistiques et analyse des données en 2022.
|
210 |
+
Il a également obtenu un master en statistique des données massives en 2022.
|
211 |
+
Enfin, il a obtenu un master en analyse des données massives en 2022.
|
212 |
+
Il est également responsable de l’équipe de recherche en Analyse des données massives.
|
213 |
+
Il est également responsable
|
214 |
+
Time to retrieve answer: 211.99833258299986
|
215 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
216 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
217 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
218 |
+
warnings.warn(
|
219 |
+
/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
|
220 |
+
warnings.warn(
|
221 |
+
/tmp/ipykernel_34/3516238434.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
|
222 |
+
return {key: torch.tensor(val[idx]) for key, val in self.examples.items()}
|
223 |
+
/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
|
wandb/run-20240603_175449-d191dh7n/files/requirements.txt
ADDED
@@ -0,0 +1,870 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
==
|
2 |
+
Babel==2.14.0
|
3 |
+
Boruta==0.3
|
4 |
+
Brotli==1.1.0
|
5 |
+
CVXcanon==0.1.2
|
6 |
+
Cartopy==0.23.0
|
7 |
+
Cython==3.0.8
|
8 |
+
Deprecated==1.2.14
|
9 |
+
Farama-Notifications==0.0.4
|
10 |
+
Flask==3.0.3
|
11 |
+
Geohash==1.0
|
12 |
+
GitPython==3.1.41
|
13 |
+
ImageHash==4.3.1
|
14 |
+
Janome==0.5.0
|
15 |
+
Jinja2==3.1.2
|
16 |
+
LunarCalendar==0.0.9
|
17 |
+
Mako==1.3.5
|
18 |
+
Markdown==3.5.2
|
19 |
+
MarkupSafe==2.1.3
|
20 |
+
MarkupSafe==2.1.5
|
21 |
+
Pillow==9.5.0
|
22 |
+
PuLP==2.8.0
|
23 |
+
PyArabic==0.6.15
|
24 |
+
PyJWT==2.8.0
|
25 |
+
PyMeeus==0.5.12
|
26 |
+
PySocks==1.7.1
|
27 |
+
PyUpSet==0.1.1.post7
|
28 |
+
PyWavelets==1.5.0
|
29 |
+
PyYAML==6.0.1
|
30 |
+
Pygments==2.17.2
|
31 |
+
Pympler==1.0.1
|
32 |
+
QtPy==2.4.1
|
33 |
+
Rtree==1.2.0
|
34 |
+
SQLAlchemy==2.0.25
|
35 |
+
SecretStorage==3.3.3
|
36 |
+
Send2Trash==1.8.2
|
37 |
+
Shapely==1.8.5.post1
|
38 |
+
Shimmy==1.3.0
|
39 |
+
SimpleITK==2.3.1
|
40 |
+
TPOT==0.12.1
|
41 |
+
Theano-PyMC==1.1.2
|
42 |
+
Theano==1.0.5
|
43 |
+
Wand==0.6.13
|
44 |
+
Werkzeug==3.0.3
|
45 |
+
absl-py==1.4.0
|
46 |
+
accelerate==0.30.1
|
47 |
+
access==1.1.9
|
48 |
+
affine==2.4.0
|
49 |
+
aiobotocore==2.13.0
|
50 |
+
aiofiles==22.1.0
|
51 |
+
aiohttp-cors==0.7.0
|
52 |
+
aiohttp==3.9.5
|
53 |
+
aioitertools==0.11.0
|
54 |
+
aiorwlock==1.3.0
|
55 |
+
aiosignal==1.3.1
|
56 |
+
aiosqlite==0.19.0
|
57 |
+
albumentations==1.4.0
|
58 |
+
alembic==1.13.1
|
59 |
+
altair==5.3.0
|
60 |
+
annotated-types==0.6.0
|
61 |
+
annotated-types==0.7.0
|
62 |
+
annoy==1.17.3
|
63 |
+
anyio==4.2.0
|
64 |
+
apache-beam==2.46.0
|
65 |
+
aplus==0.11.0
|
66 |
+
appdirs==1.4.4
|
67 |
+
archspec==0.2.3
|
68 |
+
argon2-cffi-bindings==21.2.0
|
69 |
+
argon2-cffi==23.1.0
|
70 |
+
array-record==0.5.0
|
71 |
+
arrow==1.3.0
|
72 |
+
arviz==0.18.0
|
73 |
+
astroid==3.2.2
|
74 |
+
astropy-iers-data==0.2024.5.27.0.30.8
|
75 |
+
astropy==6.1.0
|
76 |
+
asttokens==2.4.1
|
77 |
+
astunparse==1.6.3
|
78 |
+
async-lru==2.0.4
|
79 |
+
async-timeout==4.0.3
|
80 |
+
attrs==23.2.0
|
81 |
+
audioread==3.0.1
|
82 |
+
autopep8==2.0.4
|
83 |
+
backoff==2.2.1
|
84 |
+
bayesian-optimization==1.4.3
|
85 |
+
beatrix_jupyterlab==2023.128.151533
|
86 |
+
beautifulsoup4==4.12.2
|
87 |
+
bitsandbytes==0.43.1
|
88 |
+
blake3==0.2.1
|
89 |
+
bleach==6.1.0
|
90 |
+
blessed==1.20.0
|
91 |
+
blinker==1.8.2
|
92 |
+
blis==0.7.10
|
93 |
+
blosc2==2.6.2
|
94 |
+
bokeh==3.4.1
|
95 |
+
boltons==23.1.1
|
96 |
+
boto3==1.26.100
|
97 |
+
botocore==1.34.106
|
98 |
+
bq_helper==0.4.1
|
99 |
+
bqplot==0.12.43
|
100 |
+
branca==0.7.2
|
101 |
+
brewer2mpl==1.4.1
|
102 |
+
brotlipy==0.7.0
|
103 |
+
cached-property==1.5.2
|
104 |
+
cachetools==4.2.4
|
105 |
+
cachetools==5.3.2
|
106 |
+
catalogue==2.0.10
|
107 |
+
catalyst==22.4
|
108 |
+
catboost==1.2.5
|
109 |
+
category-encoders==2.6.3
|
110 |
+
certifi==2024.2.2
|
111 |
+
cesium==0.12.1
|
112 |
+
cffi==1.16.0
|
113 |
+
charset-normalizer==3.3.2
|
114 |
+
chex==0.1.86
|
115 |
+
cleverhans==4.0.0
|
116 |
+
click-plugins==1.1.1
|
117 |
+
click==8.1.7
|
118 |
+
cligj==0.7.2
|
119 |
+
cloud-tpu-client==0.10
|
120 |
+
cloud-tpu-profiler==2.4.0
|
121 |
+
cloudpathlib==0.16.0
|
122 |
+
cloudpickle==2.2.1
|
123 |
+
cloudpickle==3.0.0
|
124 |
+
cmdstanpy==1.2.2
|
125 |
+
colorama==0.4.6
|
126 |
+
colorcet==3.1.0
|
127 |
+
colorful==0.5.6
|
128 |
+
colorlog==6.8.2
|
129 |
+
colorlover==0.3.0
|
130 |
+
comm==0.2.1
|
131 |
+
conda-libmamba-solver==23.12.0
|
132 |
+
conda-package-handling==2.2.0
|
133 |
+
conda==24.5.0
|
134 |
+
conda_package_streaming==0.9.0
|
135 |
+
confection==0.1.4
|
136 |
+
contextily==1.6.0
|
137 |
+
contourpy==1.2.0
|
138 |
+
contourpy==1.2.1
|
139 |
+
convertdate==2.4.0
|
140 |
+
crcmod==1.7
|
141 |
+
cryptography==41.0.7
|
142 |
+
cuda-python==12.5.0
|
143 |
+
cudf==24.4.1
|
144 |
+
cufflinks==0.17.3
|
145 |
+
cuml==24.4.0
|
146 |
+
cupy==13.1.0
|
147 |
+
cycler==0.12.1
|
148 |
+
cymem==2.0.8
|
149 |
+
cytoolz==0.12.3
|
150 |
+
daal4py==2024.4.0
|
151 |
+
daal==2024.4.0
|
152 |
+
dacite==1.8.1
|
153 |
+
dask-cuda==24.4.0
|
154 |
+
dask-cudf==24.4.1
|
155 |
+
dask-expr==1.1.1
|
156 |
+
dask==2024.5.1
|
157 |
+
dataclasses-json==0.6.6
|
158 |
+
dataproc_jupyter_plugin==0.1.66
|
159 |
+
datasets==2.19.1
|
160 |
+
datashader==0.16.1
|
161 |
+
datatile==1.0.3
|
162 |
+
db-dtypes==1.2.0
|
163 |
+
deap==1.4.1
|
164 |
+
debugpy==1.8.0
|
165 |
+
decorator==5.1.1
|
166 |
+
deepdiff==7.0.1
|
167 |
+
defusedxml==0.7.1
|
168 |
+
deprecation==2.1.0
|
169 |
+
descartes==1.1.0
|
170 |
+
dill==0.3.8
|
171 |
+
dipy==1.9.0
|
172 |
+
distlib==0.3.8
|
173 |
+
distributed==2024.1.1
|
174 |
+
distro==1.9.0
|
175 |
+
dm-tree==0.1.8
|
176 |
+
docker-pycreds==0.4.0
|
177 |
+
docker==7.0.0
|
178 |
+
docopt==0.6.2
|
179 |
+
docstring-parser==0.15
|
180 |
+
docstring-to-markdown==0.15
|
181 |
+
docutils==0.21.2
|
182 |
+
earthengine-api==0.1.404
|
183 |
+
easydict==1.13
|
184 |
+
easyocr==1.7.1
|
185 |
+
ecos==2.0.13
|
186 |
+
eli5==0.13.0
|
187 |
+
emoji==2.12.1
|
188 |
+
en-core-web-lg==3.7.1
|
189 |
+
en-core-web-sm==3.7.1
|
190 |
+
entrypoints==0.4
|
191 |
+
ephem==4.1.5
|
192 |
+
esda==2.5.1
|
193 |
+
essentia==2.1b6.dev1110
|
194 |
+
et-xmlfile==1.1.0
|
195 |
+
etils==1.6.0
|
196 |
+
exceptiongroup==1.2.0
|
197 |
+
executing==2.0.1
|
198 |
+
explainable-ai-sdk==1.3.3
|
199 |
+
fastai==2.7.15
|
200 |
+
fastapi==0.108.0
|
201 |
+
fastavro==1.9.3
|
202 |
+
fastcore==1.5.41
|
203 |
+
fastdownload==0.0.7
|
204 |
+
fasteners==0.19
|
205 |
+
fastjsonschema==2.19.1
|
206 |
+
fastprogress==1.0.3
|
207 |
+
fastrlock==0.8.2
|
208 |
+
fasttext==0.9.2
|
209 |
+
feather-format==0.4.1
|
210 |
+
featuretools==1.31.0
|
211 |
+
filelock==3.13.1
|
212 |
+
fiona==1.9.6
|
213 |
+
fitter==1.7.0
|
214 |
+
flake8==7.0.0
|
215 |
+
flashtext==2.7
|
216 |
+
flatbuffers==23.5.26
|
217 |
+
flax==0.8.4
|
218 |
+
folium==0.16.0
|
219 |
+
fonttools==4.47.0
|
220 |
+
fonttools==4.52.4
|
221 |
+
fqdn==1.5.1
|
222 |
+
frozendict==2.4.4
|
223 |
+
frozenlist==1.4.1
|
224 |
+
fsspec==2024.3.1
|
225 |
+
fsspec==2024.5.0
|
226 |
+
funcy==2.0
|
227 |
+
fury==0.10.0
|
228 |
+
future==1.0.0
|
229 |
+
fuzzywuzzy==0.18.0
|
230 |
+
gast==0.5.4
|
231 |
+
gatspy==0.3
|
232 |
+
gcsfs==2024.3.1
|
233 |
+
gensim==4.3.2
|
234 |
+
geographiclib==2.0
|
235 |
+
geojson==3.1.0
|
236 |
+
geopandas==0.14.4
|
237 |
+
geoplot==0.5.1
|
238 |
+
geopy==2.4.1
|
239 |
+
geoviews==1.12.0
|
240 |
+
ggplot==0.11.5
|
241 |
+
giddy==2.3.5
|
242 |
+
gitdb==4.0.11
|
243 |
+
google-ai-generativelanguage==0.6.4
|
244 |
+
google-api-core==2.11.1
|
245 |
+
google-api-core==2.19.0
|
246 |
+
google-api-python-client==2.131.0
|
247 |
+
google-apitools==0.5.31
|
248 |
+
google-auth-httplib2==0.2.0
|
249 |
+
google-auth-oauthlib==1.2.0
|
250 |
+
google-auth==2.26.1
|
251 |
+
google-cloud-aiplatform==0.6.0a1
|
252 |
+
google-cloud-artifact-registry==1.10.0
|
253 |
+
google-cloud-automl==1.0.1
|
254 |
+
google-cloud-bigquery==2.34.4
|
255 |
+
google-cloud-bigtable==1.7.3
|
256 |
+
google-cloud-core==2.4.1
|
257 |
+
google-cloud-datastore==2.19.0
|
258 |
+
google-cloud-dlp==3.14.0
|
259 |
+
google-cloud-jupyter-config==0.0.5
|
260 |
+
google-cloud-language==2.13.3
|
261 |
+
google-cloud-monitoring==2.18.0
|
262 |
+
google-cloud-pubsub==2.19.0
|
263 |
+
google-cloud-pubsublite==1.9.0
|
264 |
+
google-cloud-recommendations-ai==0.7.1
|
265 |
+
google-cloud-resource-manager==1.11.0
|
266 |
+
google-cloud-spanner==3.40.1
|
267 |
+
google-cloud-storage==1.44.0
|
268 |
+
google-cloud-translate==3.12.1
|
269 |
+
google-cloud-videointelligence==2.13.3
|
270 |
+
google-cloud-vision==2.8.0
|
271 |
+
google-crc32c==1.5.0
|
272 |
+
google-generativeai==0.5.4
|
273 |
+
google-pasta==0.2.0
|
274 |
+
google-resumable-media==2.7.0
|
275 |
+
googleapis-common-protos==1.62.0
|
276 |
+
gplearn==0.4.2
|
277 |
+
gpustat==1.0.0
|
278 |
+
gpxpy==1.6.2
|
279 |
+
graphviz==0.20.3
|
280 |
+
greenlet==3.0.3
|
281 |
+
grpc-google-iam-v1==0.12.7
|
282 |
+
grpcio-status==1.48.1
|
283 |
+
grpcio-status==1.48.2
|
284 |
+
grpcio==1.59.3
|
285 |
+
grpcio==1.60.0
|
286 |
+
gviz-api==1.10.0
|
287 |
+
gym-notices==0.0.8
|
288 |
+
gym==0.26.2
|
289 |
+
gymnasium==0.29.0
|
290 |
+
h11==0.14.0
|
291 |
+
h2o==3.46.0.2
|
292 |
+
h5netcdf==1.3.0
|
293 |
+
h5py==3.10.0
|
294 |
+
haversine==2.8.1
|
295 |
+
hdfs==2.7.3
|
296 |
+
hep-ml==0.7.2
|
297 |
+
hijri-converter==2.3.1
|
298 |
+
hmmlearn==0.3.2
|
299 |
+
holidays==0.24
|
300 |
+
holoviews==1.18.3
|
301 |
+
hpsklearn==0.1.0
|
302 |
+
html5lib==1.1
|
303 |
+
htmlmin==0.1.12
|
304 |
+
httpcore==1.0.5
|
305 |
+
httplib2==0.21.0
|
306 |
+
httptools==0.6.1
|
307 |
+
httpx==0.27.0
|
308 |
+
huggingface-hub==0.23.2
|
309 |
+
hunspell==0.5.5
|
310 |
+
hydra-slayer==0.5.0
|
311 |
+
hyperopt==0.2.7
|
312 |
+
hypertools==0.8.0
|
313 |
+
idna==3.6
|
314 |
+
igraph==0.11.5
|
315 |
+
imagecodecs==2024.1.1
|
316 |
+
imageio==2.33.1
|
317 |
+
imbalanced-learn==0.12.3
|
318 |
+
imgaug==0.4.0
|
319 |
+
importlib-metadata==6.11.0
|
320 |
+
importlib-metadata==7.0.1
|
321 |
+
importlib-resources==6.1.1
|
322 |
+
inequality==1.0.1
|
323 |
+
iniconfig==2.0.0
|
324 |
+
ipydatawidgets==4.3.5
|
325 |
+
ipykernel==6.28.0
|
326 |
+
ipyleaflet==0.19.1
|
327 |
+
ipympl==0.7.0
|
328 |
+
ipython-genutils==0.2.0
|
329 |
+
ipython-genutils==0.2.0
|
330 |
+
ipython-sql==0.5.0
|
331 |
+
ipython==8.20.0
|
332 |
+
ipyvolume==0.6.3
|
333 |
+
ipyvue==1.11.1
|
334 |
+
ipyvuetify==1.9.4
|
335 |
+
ipywebrtc==0.6.0
|
336 |
+
ipywidgets==7.7.1
|
337 |
+
isoduration==20.11.0
|
338 |
+
isort==5.13.2
|
339 |
+
isoweek==1.3.3
|
340 |
+
itsdangerous==2.2.0
|
341 |
+
jaraco.classes==3.3.0
|
342 |
+
jax-jumpy==1.0.0
|
343 |
+
jax==0.4.26
|
344 |
+
jaxlib==0.4.26.dev20240504
|
345 |
+
jedi==0.19.1
|
346 |
+
jeepney==0.8.0
|
347 |
+
jieba==0.42.1
|
348 |
+
jmespath==1.0.1
|
349 |
+
joblib==1.4.2
|
350 |
+
json5==0.9.14
|
351 |
+
jsonpatch==1.33
|
352 |
+
jsonpointer==2.4
|
353 |
+
jsonschema-specifications==2023.12.1
|
354 |
+
jsonschema==4.20.0
|
355 |
+
jupyter-console==6.6.3
|
356 |
+
jupyter-events==0.9.0
|
357 |
+
jupyter-http-over-ws==0.0.8
|
358 |
+
jupyter-leaflet==0.19.1
|
359 |
+
jupyter-lsp==1.5.1
|
360 |
+
jupyter-server-mathjax==0.2.6
|
361 |
+
jupyter-ydoc==0.2.5
|
362 |
+
jupyter_client==7.4.9
|
363 |
+
jupyter_client==8.6.0
|
364 |
+
jupyter_core==5.7.1
|
365 |
+
jupyter_server==2.12.5
|
366 |
+
jupyter_server_fileid==0.9.1
|
367 |
+
jupyter_server_proxy==4.1.0
|
368 |
+
jupyter_server_terminals==0.5.1
|
369 |
+
jupyter_server_ydoc==0.8.0
|
370 |
+
jupyterlab-lsp==5.1.0
|
371 |
+
jupyterlab-widgets==3.0.9
|
372 |
+
jupyterlab==4.2.1
|
373 |
+
jupyterlab_git==0.44.0
|
374 |
+
jupyterlab_pygments==0.3.0
|
375 |
+
jupyterlab_server==2.27.2
|
376 |
+
jupytext==1.16.0
|
377 |
+
kaggle-environments==1.14.9
|
378 |
+
kaggle==1.6.14
|
379 |
+
kagglehub==0.2.5
|
380 |
+
keras-cv==0.9.0
|
381 |
+
keras-nlp==0.12.1
|
382 |
+
keras-tuner==1.4.6
|
383 |
+
keras==3.3.3
|
384 |
+
kernels-mixer==0.0.7
|
385 |
+
keyring==24.3.0
|
386 |
+
keyrings.google-artifactregistry-auth==1.1.2
|
387 |
+
kfp-pipeline-spec==0.2.2
|
388 |
+
kfp-server-api==2.0.5
|
389 |
+
kfp==2.5.0
|
390 |
+
kiwisolver==1.4.5
|
391 |
+
kmapper==2.0.1
|
392 |
+
kmodes==0.12.2
|
393 |
+
korean-lunar-calendar==0.3.1
|
394 |
+
kornia==0.7.2
|
395 |
+
kornia_rs==0.1.3
|
396 |
+
kt-legacy==1.0.5
|
397 |
+
kubernetes==26.1.0
|
398 |
+
langcodes==3.4.0
|
399 |
+
langid==1.1.6
|
400 |
+
language_data==1.2.0
|
401 |
+
lazy_loader==0.3
|
402 |
+
learntools==0.3.4
|
403 |
+
leven==1.0.4
|
404 |
+
libclang==16.0.6
|
405 |
+
libmambapy==1.5.8
|
406 |
+
libpysal==4.9.2
|
407 |
+
librosa==0.10.2.post1
|
408 |
+
lightgbm==4.2.0
|
409 |
+
lightning-utilities==0.11.2
|
410 |
+
lime==0.2.0.1
|
411 |
+
line_profiler==4.1.3
|
412 |
+
linkify-it-py==2.0.3
|
413 |
+
llvmlite==0.41.1
|
414 |
+
llvmlite==0.42.0
|
415 |
+
lml==0.1.0
|
416 |
+
locket==1.0.0
|
417 |
+
loguru==0.7.2
|
418 |
+
lxml==5.2.2
|
419 |
+
lz4==4.3.3
|
420 |
+
mamba==1.5.8
|
421 |
+
mapclassify==2.6.1
|
422 |
+
marisa-trie==1.1.0
|
423 |
+
markdown-it-py==3.0.0
|
424 |
+
marshmallow==3.21.2
|
425 |
+
matplotlib-inline==0.1.6
|
426 |
+
matplotlib-venn==0.11.10
|
427 |
+
matplotlib==3.7.5
|
428 |
+
matplotlib==3.8.4
|
429 |
+
mccabe==0.7.0
|
430 |
+
mdit-py-plugins==0.4.0
|
431 |
+
mdurl==0.1.2
|
432 |
+
memory-profiler==0.61.0
|
433 |
+
menuinst==2.0.1
|
434 |
+
mercantile==1.2.1
|
435 |
+
mgwr==2.2.1
|
436 |
+
missingno==0.5.2
|
437 |
+
mistune==0.8.4
|
438 |
+
mizani==0.11.4
|
439 |
+
ml-dtypes==0.2.0
|
440 |
+
mlcrate==0.2.0
|
441 |
+
mlens==0.2.3
|
442 |
+
mlxtend==0.23.1
|
443 |
+
mne==1.7.0
|
444 |
+
mnist==0.2.2
|
445 |
+
momepy==0.7.0
|
446 |
+
more-itertools==10.2.0
|
447 |
+
mpld3==0.5.10
|
448 |
+
mpmath==1.3.0
|
449 |
+
msgpack==1.0.7
|
450 |
+
msgpack==1.0.8
|
451 |
+
multidict==6.0.4
|
452 |
+
multimethod==1.10
|
453 |
+
multipledispatch==1.0.0
|
454 |
+
multiprocess==0.70.16
|
455 |
+
munkres==1.1.4
|
456 |
+
murmurhash==1.0.10
|
457 |
+
mypy-extensions==1.0.0
|
458 |
+
namex==0.0.8
|
459 |
+
nb-conda-kernels==2.3.1
|
460 |
+
nb_conda==2.2.1
|
461 |
+
nbclassic==1.0.0
|
462 |
+
nbclient==0.5.13
|
463 |
+
nbconvert==6.4.5
|
464 |
+
nbdime==3.2.0
|
465 |
+
nbformat==5.9.2
|
466 |
+
ndindex==1.8
|
467 |
+
nest-asyncio==1.5.8
|
468 |
+
networkx==3.2.1
|
469 |
+
nibabel==5.2.1
|
470 |
+
nilearn==0.10.4
|
471 |
+
ninja==1.11.1.1
|
472 |
+
nltk==3.2.4
|
473 |
+
nose==1.3.7
|
474 |
+
notebook==6.5.4
|
475 |
+
notebook==6.5.6
|
476 |
+
notebook_executor==0.2
|
477 |
+
notebook_shim==0.2.3
|
478 |
+
numba==0.58.1
|
479 |
+
numba==0.59.1
|
480 |
+
numexpr==2.10.0
|
481 |
+
numpy==1.26.4
|
482 |
+
nvidia-ml-py==11.495.46
|
483 |
+
nvtx==0.2.10
|
484 |
+
oauth2client==4.1.3
|
485 |
+
oauthlib==3.2.2
|
486 |
+
objsize==0.6.1
|
487 |
+
odfpy==1.4.1
|
488 |
+
olefile==0.47
|
489 |
+
onnx==1.16.1
|
490 |
+
opencensus-context==0.1.3
|
491 |
+
opencensus==0.11.4
|
492 |
+
opencv-contrib-python==4.9.0.80
|
493 |
+
opencv-python-headless==4.9.0.80
|
494 |
+
opencv-python==4.9.0.80
|
495 |
+
openpyxl==3.1.2
|
496 |
+
openslide-python==1.3.1
|
497 |
+
opentelemetry-api==1.22.0
|
498 |
+
opentelemetry-exporter-otlp-proto-common==1.22.0
|
499 |
+
opentelemetry-exporter-otlp-proto-grpc==1.22.0
|
500 |
+
opentelemetry-exporter-otlp-proto-http==1.22.0
|
501 |
+
opentelemetry-exporter-otlp==1.22.0
|
502 |
+
opentelemetry-proto==1.22.0
|
503 |
+
opentelemetry-sdk==1.22.0
|
504 |
+
opentelemetry-semantic-conventions==0.43b0
|
505 |
+
opt-einsum==3.3.0
|
506 |
+
optax==0.2.2
|
507 |
+
optree==0.11.0
|
508 |
+
optuna==3.6.1
|
509 |
+
orbax-checkpoint==0.5.14
|
510 |
+
ordered-set==4.1.0
|
511 |
+
orjson==3.9.10
|
512 |
+
ortools==9.4.1874
|
513 |
+
osmnx==1.9.3
|
514 |
+
overrides==7.4.0
|
515 |
+
packaging==21.3
|
516 |
+
pandas-datareader==0.10.0
|
517 |
+
pandas-profiling==3.6.6
|
518 |
+
pandas-summary==0.2.0
|
519 |
+
pandas==2.2.1
|
520 |
+
pandas==2.2.2
|
521 |
+
pandasql==0.7.3
|
522 |
+
pandocfilters==1.5.0
|
523 |
+
panel==1.4.3
|
524 |
+
papermill==2.5.0
|
525 |
+
param==2.1.0
|
526 |
+
parso==0.8.3
|
527 |
+
partd==1.4.2
|
528 |
+
path.py==12.5.0
|
529 |
+
path==16.14.0
|
530 |
+
pathos==0.3.2
|
531 |
+
pathy==0.10.3
|
532 |
+
patsy==0.5.6
|
533 |
+
pdf2image==1.17.0
|
534 |
+
peft==0.11.1
|
535 |
+
pettingzoo==1.24.0
|
536 |
+
pexpect==4.8.0
|
537 |
+
pexpect==4.9.0
|
538 |
+
phik==0.12.4
|
539 |
+
pickleshare==0.7.5
|
540 |
+
pillow==10.3.0
|
541 |
+
pip==23.3.2
|
542 |
+
pkgutil_resolve_name==1.3.10
|
543 |
+
platformdirs==4.2.2
|
544 |
+
plotly-express==0.4.1
|
545 |
+
plotly==5.18.0
|
546 |
+
plotnine==0.13.6
|
547 |
+
pluggy==1.5.0
|
548 |
+
pointpats==2.4.0
|
549 |
+
polars==0.20.30
|
550 |
+
polyglot==16.7.4
|
551 |
+
pooch==1.8.1
|
552 |
+
pox==0.3.4
|
553 |
+
ppca==0.0.4
|
554 |
+
ppft==1.7.6.8
|
555 |
+
preprocessing==0.1.13
|
556 |
+
preshed==3.0.9
|
557 |
+
prettytable==3.9.0
|
558 |
+
progressbar2==4.4.2
|
559 |
+
prometheus-client==0.19.0
|
560 |
+
promise==2.3
|
561 |
+
prompt-toolkit==3.0.42
|
562 |
+
prompt-toolkit==3.0.43
|
563 |
+
prophet==1.1.1
|
564 |
+
proto-plus==1.23.0
|
565 |
+
protobuf==3.20.3
|
566 |
+
protobuf==4.24.4
|
567 |
+
psutil==5.9.3
|
568 |
+
psutil==5.9.7
|
569 |
+
ptyprocess==0.7.0
|
570 |
+
pudb==2024.1
|
571 |
+
pure-eval==0.2.2
|
572 |
+
py-cpuinfo==9.0.0
|
573 |
+
py-spy==0.3.14
|
574 |
+
py4j==0.10.9.7
|
575 |
+
pyLDAvis==3.4.1
|
576 |
+
pyOpenSSL==23.3.0
|
577 |
+
pyaml==24.4.0
|
578 |
+
pyarrow-hotfix==0.6
|
579 |
+
pyarrow==14.0.2
|
580 |
+
pyasn1-modules==0.3.0
|
581 |
+
pyasn1==0.5.1
|
582 |
+
pybind11==2.12.0
|
583 |
+
pyclipper==1.3.0.post5
|
584 |
+
pycodestyle==2.11.1
|
585 |
+
pycosat==0.6.6
|
586 |
+
pycparser==2.21
|
587 |
+
pycryptodome==3.20.0
|
588 |
+
pyct==0.5.0
|
589 |
+
pycuda==2024.1
|
590 |
+
pydantic==2.5.3
|
591 |
+
pydantic==2.7.2
|
592 |
+
pydantic_core==2.14.6
|
593 |
+
pydantic_core==2.18.3
|
594 |
+
pydegensac==0.1.2
|
595 |
+
pydicom==2.4.4
|
596 |
+
pydocstyle==6.3.0
|
597 |
+
pydot==1.4.2
|
598 |
+
pydub==0.25.1
|
599 |
+
pyemd==1.0.0
|
600 |
+
pyerfa==2.0.1.4
|
601 |
+
pyexcel-io==0.6.6
|
602 |
+
pyexcel-ods==0.6.0
|
603 |
+
pyflakes==3.2.0
|
604 |
+
pygltflib==1.16.2
|
605 |
+
pykalman==0.9.7
|
606 |
+
pylibraft==24.4.0
|
607 |
+
pylint==3.2.2
|
608 |
+
pymc3==3.11.4
|
609 |
+
pymongo==3.13.0
|
610 |
+
pynndescent==0.5.12
|
611 |
+
pynvjitlink==0.2.3
|
612 |
+
pynvml==11.4.1
|
613 |
+
pynvrtc==9.2
|
614 |
+
pyparsing==3.1.1
|
615 |
+
pyparsing==3.1.2
|
616 |
+
pypdf==4.2.0
|
617 |
+
pyproj==3.6.1
|
618 |
+
pysal==24.1
|
619 |
+
pyshp==2.3.1
|
620 |
+
pytesseract==0.3.10
|
621 |
+
pytest==8.2.1
|
622 |
+
python-bidi==0.4.2
|
623 |
+
python-dateutil==2.9.0.post0
|
624 |
+
python-dotenv==1.0.0
|
625 |
+
python-json-logger==2.0.7
|
626 |
+
python-louvain==0.16
|
627 |
+
python-lsp-jsonrpc==1.1.2
|
628 |
+
python-lsp-server==1.11.0
|
629 |
+
python-slugify==8.0.4
|
630 |
+
python-utils==3.8.2
|
631 |
+
pythreejs==2.4.2
|
632 |
+
pytoolconfig==1.3.1
|
633 |
+
pytools==2024.1.3
|
634 |
+
pytorch-ignite==0.5.0.post2
|
635 |
+
pytorch-lightning==2.2.5
|
636 |
+
pytz==2023.3.post1
|
637 |
+
pytz==2024.1
|
638 |
+
pyu2f==0.1.5
|
639 |
+
pyviz_comms==3.0.2
|
640 |
+
pyzmq==24.0.1
|
641 |
+
pyzmq==25.1.2
|
642 |
+
qgrid==1.3.1
|
643 |
+
qtconsole==5.5.2
|
644 |
+
quantecon==0.7.2
|
645 |
+
qudida==0.0.4
|
646 |
+
raft-dask==24.4.0
|
647 |
+
rapids-dask-dependency==24.4.1a0
|
648 |
+
rasterio==1.3.10
|
649 |
+
rasterstats==0.19.0
|
650 |
+
ray-cpp==2.9.0
|
651 |
+
ray==2.9.0
|
652 |
+
referencing==0.32.1
|
653 |
+
regex==2023.12.25
|
654 |
+
requests-oauthlib==1.3.1
|
655 |
+
requests-toolbelt==0.10.1
|
656 |
+
requests==2.31.0
|
657 |
+
retrying==1.3.3
|
658 |
+
retrying==1.3.4
|
659 |
+
rfc3339-validator==0.1.4
|
660 |
+
rfc3986-validator==0.1.1
|
661 |
+
rgf-python==3.12.0
|
662 |
+
rich-click==1.8.2
|
663 |
+
rich==13.7.0
|
664 |
+
rich==13.7.1
|
665 |
+
rmm==24.4.0
|
666 |
+
rope==1.13.0
|
667 |
+
rpds-py==0.16.2
|
668 |
+
rsa==4.9
|
669 |
+
ruamel-yaml-conda==0.15.100
|
670 |
+
ruamel.yaml.clib==0.2.7
|
671 |
+
ruamel.yaml==0.18.5
|
672 |
+
s2sphere==0.2.5
|
673 |
+
s3fs==2024.3.1
|
674 |
+
s3transfer==0.6.2
|
675 |
+
safetensors==0.4.3
|
676 |
+
scattertext==0.1.19
|
677 |
+
scikit-image==0.22.0
|
678 |
+
scikit-learn-intelex==2024.4.0
|
679 |
+
scikit-learn==1.2.2
|
680 |
+
scikit-multilearn==0.2.0
|
681 |
+
scikit-optimize==0.10.1
|
682 |
+
scikit-plot==0.3.7
|
683 |
+
scikit-surprise==1.1.4
|
684 |
+
scipy==1.11.4
|
685 |
+
scipy==1.13.1
|
686 |
+
seaborn==0.12.2
|
687 |
+
segment_anything==1.0
|
688 |
+
segregation==2.5
|
689 |
+
semver==3.0.2
|
690 |
+
sentencepiece==0.2.0
|
691 |
+
sentry-sdk==2.3.1
|
692 |
+
setproctitle==1.3.3
|
693 |
+
setuptools-git==1.2
|
694 |
+
setuptools-scm==8.1.0
|
695 |
+
setuptools==69.0.3
|
696 |
+
shap==0.44.1
|
697 |
+
shapely==2.0.4
|
698 |
+
shellingham==1.5.4
|
699 |
+
simpervisor==1.0.0
|
700 |
+
simplejson==3.19.2
|
701 |
+
six==1.16.0
|
702 |
+
sklearn-pandas==2.2.0
|
703 |
+
slicer==0.0.7
|
704 |
+
smart-open==6.4.0
|
705 |
+
smmap==5.0.1
|
706 |
+
sniffio==1.3.0
|
707 |
+
snowballstemmer==2.2.0
|
708 |
+
snuggs==1.4.7
|
709 |
+
sortedcontainers==2.4.0
|
710 |
+
soundfile==0.12.1
|
711 |
+
soupsieve==2.5
|
712 |
+
soxr==0.3.7
|
713 |
+
spacy-legacy==3.0.12
|
714 |
+
spacy-loggers==1.0.5
|
715 |
+
spacy==3.7.3
|
716 |
+
spaghetti==1.7.5.post1
|
717 |
+
spectral==0.23.1
|
718 |
+
spglm==1.1.0
|
719 |
+
sphinx-rtd-theme==0.2.4
|
720 |
+
spint==1.0.7
|
721 |
+
splot==1.1.5.post1
|
722 |
+
spopt==0.6.0
|
723 |
+
spreg==1.4.2
|
724 |
+
spvcm==0.3.0
|
725 |
+
sqlparse==0.4.4
|
726 |
+
squarify==0.4.3
|
727 |
+
srsly==2.4.8
|
728 |
+
stable-baselines3==2.1.0
|
729 |
+
stack-data==0.6.2
|
730 |
+
stack-data==0.6.3
|
731 |
+
stanio==0.5.0
|
732 |
+
starlette==0.32.0.post1
|
733 |
+
statsmodels==0.14.1
|
734 |
+
stemming==1.0.1
|
735 |
+
stop-words==2018.7.23
|
736 |
+
stopit==1.1.2
|
737 |
+
stumpy==1.12.0
|
738 |
+
sympy==1.12
|
739 |
+
tables==3.9.2
|
740 |
+
tabulate==0.9.0
|
741 |
+
tangled-up-in-unicode==0.2.0
|
742 |
+
tbb==2021.12.0
|
743 |
+
tblib==3.0.0
|
744 |
+
tenacity==8.2.3
|
745 |
+
tensorboard-data-server==0.7.2
|
746 |
+
tensorboard-plugin-profile==2.15.0
|
747 |
+
tensorboard==2.15.1
|
748 |
+
tensorboardX==2.6.2.2
|
749 |
+
tensorflow-cloud==0.1.16
|
750 |
+
tensorflow-datasets==4.9.4
|
751 |
+
tensorflow-decision-forests==1.8.1
|
752 |
+
tensorflow-estimator==2.15.0
|
753 |
+
tensorflow-hub==0.16.1
|
754 |
+
tensorflow-io-gcs-filesystem==0.35.0
|
755 |
+
tensorflow-io==0.35.0
|
756 |
+
tensorflow-metadata==0.14.0
|
757 |
+
tensorflow-probability==0.23.0
|
758 |
+
tensorflow-serving-api==2.14.1
|
759 |
+
tensorflow-text==2.15.0
|
760 |
+
tensorflow-transform==0.14.0
|
761 |
+
tensorflow==2.15.0
|
762 |
+
tensorstore==0.1.59
|
763 |
+
termcolor==2.4.0
|
764 |
+
terminado==0.18.0
|
765 |
+
testpath==0.6.0
|
766 |
+
text-unidecode==1.3
|
767 |
+
textblob==0.18.0.post0
|
768 |
+
texttable==1.7.0
|
769 |
+
tf_keras==2.15.1
|
770 |
+
tfp-nightly==0.24.0.dev0
|
771 |
+
thinc==8.2.3
|
772 |
+
threadpoolctl==3.2.0
|
773 |
+
tifffile==2023.12.9
|
774 |
+
timm==1.0.3
|
775 |
+
tinycss2==1.2.1
|
776 |
+
tobler==0.11.2
|
777 |
+
tokenizers==0.19.1
|
778 |
+
toml==0.10.2
|
779 |
+
tomli==2.0.1
|
780 |
+
tomlkit==0.12.5
|
781 |
+
toolz==0.12.1
|
782 |
+
torch==2.1.2
|
783 |
+
torchaudio==2.1.2
|
784 |
+
torchdata==0.7.1
|
785 |
+
torchinfo==1.8.0
|
786 |
+
torchmetrics==1.4.0.post0
|
787 |
+
torchtext==0.16.2
|
788 |
+
torchvision==0.16.2
|
789 |
+
tornado==6.3.3
|
790 |
+
tqdm==4.66.4
|
791 |
+
traceml==1.0.8
|
792 |
+
traitlets==5.9.0
|
793 |
+
traittypes==0.2.1
|
794 |
+
transformers==4.41.1
|
795 |
+
treelite==4.1.2
|
796 |
+
truststore==0.8.0
|
797 |
+
trx-python==0.2.9
|
798 |
+
tsfresh==0.20.2
|
799 |
+
typeguard==4.1.5
|
800 |
+
typer==0.9.0
|
801 |
+
typer==0.9.4
|
802 |
+
types-python-dateutil==2.8.19.20240106
|
803 |
+
typing-inspect==0.9.0
|
804 |
+
typing-utils==0.1.0
|
805 |
+
typing_extensions==4.9.0
|
806 |
+
tzdata==2023.4
|
807 |
+
tzdata==2024.1
|
808 |
+
uc-micro-py==1.0.3
|
809 |
+
ucx-py==0.37.0
|
810 |
+
ujson==5.10.0
|
811 |
+
umap-learn==0.5.6
|
812 |
+
unicodedata2==15.1.0
|
813 |
+
update-checker==0.18.0
|
814 |
+
uri-template==1.3.0
|
815 |
+
uritemplate==3.0.1
|
816 |
+
urllib3==1.26.18
|
817 |
+
urllib3==2.1.0
|
818 |
+
urwid==2.6.12
|
819 |
+
urwid_readline==0.14
|
820 |
+
uvicorn==0.25.0
|
821 |
+
uvloop==0.19.0
|
822 |
+
vaex-astro==0.9.3
|
823 |
+
vaex-core==4.17.1
|
824 |
+
vaex-hdf5==0.14.1
|
825 |
+
vaex-jupyter==0.8.2
|
826 |
+
vaex-ml==0.18.3
|
827 |
+
vaex-server==0.9.0
|
828 |
+
vaex-viz==0.5.4
|
829 |
+
vaex==4.17.0
|
830 |
+
vec_noise==1.1.4
|
831 |
+
vecstack==0.4.0
|
832 |
+
virtualenv==20.21.0
|
833 |
+
visions==0.7.5
|
834 |
+
vowpalwabbit==9.9.0
|
835 |
+
vtk==9.3.0
|
836 |
+
wandb==0.17.0
|
837 |
+
wasabi==1.1.2
|
838 |
+
watchfiles==0.21.0
|
839 |
+
wavio==0.0.9
|
840 |
+
wcwidth==0.2.13
|
841 |
+
weasel==0.3.4
|
842 |
+
webcolors==1.13
|
843 |
+
webencodings==0.5.1
|
844 |
+
websocket-client==1.7.0
|
845 |
+
websockets==12.0
|
846 |
+
wfdb==4.1.2
|
847 |
+
whatthepatch==1.0.5
|
848 |
+
wheel==0.42.0
|
849 |
+
widgetsnbextension==3.6.6
|
850 |
+
witwidget==1.8.1
|
851 |
+
woodwork==0.31.0
|
852 |
+
wordcloud==1.9.3
|
853 |
+
wordsegment==1.3.1
|
854 |
+
wrapt==1.14.1
|
855 |
+
xarray-einstats==0.7.0
|
856 |
+
xarray==2024.5.0
|
857 |
+
xgboost==2.0.3
|
858 |
+
xvfbwrapper==0.2.9
|
859 |
+
xxhash==3.4.1
|
860 |
+
xyzservices==2024.4.0
|
861 |
+
y-py==0.6.2
|
862 |
+
yapf==0.40.2
|
863 |
+
yarl==1.9.3
|
864 |
+
yarl==1.9.4
|
865 |
+
ydata-profiling==4.6.4
|
866 |
+
yellowbrick==1.5
|
867 |
+
ypy-websocket==0.8.4
|
868 |
+
zict==3.0.0
|
869 |
+
zipp==3.17.0
|
870 |
+
zstandard==0.19.0
|
wandb/run-20240603_175449-d191dh7n/files/wandb-metadata.json
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.15.133+-x86_64-with-glibc2.31",
|
3 |
+
"python": "3.10.13",
|
4 |
+
"heartbeatAt": "2024-06-03T17:54:49.804208",
|
5 |
+
"startedAt": "2024-06-03T17:54:49.031804",
|
6 |
+
"docker": null,
|
7 |
+
"cuda": null,
|
8 |
+
"args": [],
|
9 |
+
"state": "running",
|
10 |
+
"program": "kaggle.ipynb",
|
11 |
+
"codePathLocal": null,
|
12 |
+
"root": "/kaggle/working",
|
13 |
+
"host": "f28ebe0d2526",
|
14 |
+
"username": "root",
|
15 |
+
"executable": "/opt/conda/bin/python3.10",
|
16 |
+
"cpu_count": 2,
|
17 |
+
"cpu_count_logical": 4,
|
18 |
+
"cpu_freq": {
|
19 |
+
"current": 2000.194,
|
20 |
+
"min": 0.0,
|
21 |
+
"max": 0.0
|
22 |
+
},
|
23 |
+
"cpu_freq_per_core": [
|
24 |
+
{
|
25 |
+
"current": 2000.194,
|
26 |
+
"min": 0.0,
|
27 |
+
"max": 0.0
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"current": 2000.194,
|
31 |
+
"min": 0.0,
|
32 |
+
"max": 0.0
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"current": 2000.194,
|
36 |
+
"min": 0.0,
|
37 |
+
"max": 0.0
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"current": 2000.194,
|
41 |
+
"min": 0.0,
|
42 |
+
"max": 0.0
|
43 |
+
}
|
44 |
+
],
|
45 |
+
"disk": {
|
46 |
+
"/": {
|
47 |
+
"total": 8062.387607574463,
|
48 |
+
"used": 5657.45686340332
|
49 |
+
}
|
50 |
+
},
|
51 |
+
"gpu": "Tesla T4",
|
52 |
+
"gpu_count": 2,
|
53 |
+
"gpu_devices": [
|
54 |
+
{
|
55 |
+
"name": "Tesla T4",
|
56 |
+
"memory_total": 16106127360
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"name": "Tesla T4",
|
60 |
+
"memory_total": 16106127360
|
61 |
+
}
|
62 |
+
],
|
63 |
+
"memory": {
|
64 |
+
"total": 31.357563018798828
|
65 |
+
}
|
66 |
+
}
|
wandb/run-20240603_175449-d191dh7n/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"train/loss": 0.6481, "train/grad_norm": 5.41937255859375, "train/learning_rate": 0.00019090909090909092, "train/epoch": 0.9090909090909091, "train/global_step": 10, "_timestamp": 1717448035.9301188, "_runtime": 10746.891048908234, "_step": 28, "train_runtime": 4190.3533, "train_samples_per_second": 0.196, "train_steps_per_second": 0.053, "total_flos": 4.4695391805696e+16, "train_loss": 0.8953418861735951}
|
wandb/run-20240603_175449-d191dh7n/logs/debug-internal.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20240603_175449-d191dh7n/logs/debug.log
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Current SDK version is 0.17.0
|
2 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Configure stats pid to 34
|
3 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
|
4 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Loading settings from /kaggle/working/wandb/settings
|
5 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
|
6 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
|
7 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
|
8 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Applying login settings: {}
|
9 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
|
10 |
+
2024-06-03 17:54:49,033 INFO MainThread:34 [wandb_init.py:_log_setup():520] Logging user logs to /kaggle/working/wandb/run-20240603_175449-d191dh7n/logs/debug.log
|
11 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:_log_setup():521] Logging internal logs to /kaggle/working/wandb/run-20240603_175449-d191dh7n/logs/debug-internal.log
|
12 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:_jupyter_setup():466] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x78eae9ee9ab0>
|
13 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:init():560] calling init triggers
|
14 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:init():567] wandb.init called with sweep_config: {}
|
15 |
+
config: {}
|
16 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:init():610] starting backend
|
17 |
+
2024-06-03 17:54:49,034 INFO MainThread:34 [wandb_init.py:init():614] setting up manager
|
18 |
+
2024-06-03 17:54:49,036 INFO MainThread:34 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
19 |
+
2024-06-03 17:54:49,038 INFO MainThread:34 [wandb_init.py:init():622] backend started and connected
|
20 |
+
2024-06-03 17:54:49,052 INFO MainThread:34 [wandb_run.py:_label_probe_notebook():1328] probe notebook
|
21 |
+
2024-06-03 17:54:49,382 INFO MainThread:34 [wandb_init.py:init():711] updated telemetry
|
22 |
+
2024-06-03 17:54:49,386 INFO MainThread:34 [wandb_init.py:init():744] communicating run to backend with 90.0 second timeout
|
23 |
+
2024-06-03 17:54:49,688 INFO MainThread:34 [wandb_run.py:_on_init():2396] communicating current version
|
24 |
+
2024-06-03 17:54:49,771 INFO MainThread:34 [wandb_run.py:_on_init():2405] got version response
|
25 |
+
2024-06-03 17:54:49,772 INFO MainThread:34 [wandb_init.py:init():795] starting run threads in backend
|
26 |
+
2024-06-03 17:55:06,077 INFO MainThread:34 [wandb_run.py:_console_start():2374] atexit reg
|
27 |
+
2024-06-03 17:55:06,077 INFO MainThread:34 [wandb_run.py:_redirect():2229] redirect: wrap_raw
|
28 |
+
2024-06-03 17:55:06,078 INFO MainThread:34 [wandb_run.py:_redirect():2294] Wrapping output streams.
|
29 |
+
2024-06-03 17:55:06,078 INFO MainThread:34 [wandb_run.py:_redirect():2319] Redirects installed.
|
30 |
+
2024-06-03 17:55:06,081 INFO MainThread:34 [wandb_init.py:init():838] run started, returning control to user process
|
31 |
+
2024-06-03 17:55:06,087 INFO MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_17-40-11_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
|
32 |
+
2024-06-03 17:55:07,353 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
33 |
+
2024-06-03 17:55:07,353 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
34 |
+
2024-06-03 17:56:56,275 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
35 |
+
2024-06-03 17:56:56,290 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
36 |
+
2024-06-03 17:56:56,290 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
37 |
+
2024-06-03 17:56:59,514 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
38 |
+
2024-06-03 17:56:59,595 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
39 |
+
2024-06-03 17:56:59,595 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
40 |
+
2024-06-03 17:57:06,214 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
41 |
+
2024-06-03 17:57:06,261 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
42 |
+
2024-06-03 17:57:06,261 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
43 |
+
2024-06-03 18:01:57,364 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
44 |
+
2024-06-03 18:01:57,366 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
45 |
+
2024-06-03 18:01:57,366 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
46 |
+
2024-06-03 18:02:16,908 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
47 |
+
2024-06-03 18:02:16,951 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
48 |
+
2024-06-03 18:02:16,952 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
49 |
+
2024-06-03 18:02:46,250 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
50 |
+
2024-06-03 18:02:46,252 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
51 |
+
2024-06-03 18:02:46,252 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
52 |
+
2024-06-03 18:03:47,943 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
53 |
+
2024-06-03 18:03:48,029 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
54 |
+
2024-06-03 18:03:48,029 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
55 |
+
2024-06-03 18:04:13,706 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
56 |
+
2024-06-03 18:04:13,759 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
57 |
+
2024-06-03 18:04:13,759 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
58 |
+
2024-06-03 18:04:26,491 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
59 |
+
2024-06-03 18:04:26,697 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
60 |
+
2024-06-03 18:04:26,697 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
61 |
+
2024-06-03 18:04:34,326 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
62 |
+
2024-06-03 18:04:35,570 INFO MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_17-40-11_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
|
63 |
+
2024-06-03 18:18:50,784 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
64 |
+
2024-06-03 18:18:50,784 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
65 |
+
2024-06-03 18:41:05,951 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
66 |
+
2024-06-03 18:41:05,953 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
67 |
+
2024-06-03 18:41:05,953 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
68 |
+
2024-06-03 18:45:28,892 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
69 |
+
2024-06-03 18:45:28,927 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
70 |
+
2024-06-03 18:45:28,927 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
71 |
+
2024-06-03 18:45:30,228 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
72 |
+
2024-06-03 18:45:30,229 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
73 |
+
2024-06-03 18:45:30,229 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
74 |
+
2024-06-03 18:45:31,254 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
75 |
+
2024-06-03 18:45:31,276 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
76 |
+
2024-06-03 18:45:31,276 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
77 |
+
2024-06-03 18:45:33,122 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
78 |
+
2024-06-03 18:45:33,358 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
79 |
+
2024-06-03 18:45:33,358 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
80 |
+
2024-06-03 18:45:36,415 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
81 |
+
2024-06-03 18:45:37,683 INFO MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 20, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_18-45-28_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
|
82 |
+
2024-06-03 19:55:43,601 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
83 |
+
2024-06-03 19:55:43,602 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
84 |
+
2024-06-03 19:56:53,516 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
85 |
+
2024-06-03 19:56:55,309 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
86 |
+
2024-06-03 19:56:55,309 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
87 |
+
2024-06-03 20:02:21,391 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
88 |
+
2024-06-03 20:02:22,164 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
89 |
+
2024-06-03 20:02:22,164 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
90 |
+
2024-06-03 20:03:12,802 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
91 |
+
2024-06-03 20:03:12,827 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
92 |
+
2024-06-03 20:03:12,827 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
93 |
+
2024-06-03 20:03:22,908 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
94 |
+
2024-06-03 20:03:23,545 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
95 |
+
2024-06-03 20:03:23,546 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
96 |
+
2024-06-03 20:04:16,404 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
97 |
+
2024-06-03 20:04:16,447 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
98 |
+
2024-06-03 20:04:16,447 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
99 |
+
2024-06-03 20:04:32,978 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
100 |
+
2024-06-03 20:04:33,028 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
101 |
+
2024-06-03 20:04:33,028 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
102 |
+
2024-06-03 20:05:18,072 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
103 |
+
2024-06-03 20:05:18,118 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
104 |
+
2024-06-03 20:05:18,118 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
105 |
+
2024-06-03 20:05:31,531 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
106 |
+
2024-06-03 20:05:31,580 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
107 |
+
2024-06-03 20:05:31,580 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
108 |
+
2024-06-03 20:05:44,101 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
109 |
+
2024-06-03 20:05:44,780 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
110 |
+
2024-06-03 20:05:44,780 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
111 |
+
2024-06-03 20:06:37,084 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
112 |
+
2024-06-03 20:06:37,830 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
113 |
+
2024-06-03 20:06:37,830 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
114 |
+
2024-06-03 20:08:59,975 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
115 |
+
2024-06-03 20:09:00,010 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
116 |
+
2024-06-03 20:09:00,010 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
117 |
+
2024-06-03 20:09:06,499 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
118 |
+
2024-06-03 20:09:06,500 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
119 |
+
2024-06-03 20:09:06,500 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
120 |
+
2024-06-03 20:09:07,197 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
121 |
+
2024-06-03 20:09:07,218 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
122 |
+
2024-06-03 20:09:07,218 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
123 |
+
2024-06-03 20:09:18,369 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
124 |
+
2024-06-03 20:09:19,119 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
125 |
+
2024-06-03 20:09:19,120 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
126 |
+
2024-06-03 20:11:52,561 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
127 |
+
2024-06-03 20:11:54,589 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
128 |
+
2024-06-03 20:11:54,589 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
129 |
+
2024-06-03 20:16:50,594 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
130 |
+
2024-06-03 20:19:04,529 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
131 |
+
2024-06-03 20:19:04,530 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
132 |
+
2024-06-03 20:20:33,194 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
133 |
+
2024-06-03 20:20:33,197 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
134 |
+
2024-06-03 20:20:33,197 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
135 |
+
2024-06-03 20:20:53,790 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
136 |
+
2024-06-03 20:24:20,236 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
137 |
+
2024-06-03 20:24:20,236 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
138 |
+
2024-06-03 20:32:45,840 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
139 |
+
2024-06-03 20:32:45,841 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
140 |
+
2024-06-03 20:32:45,841 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
141 |
+
2024-06-03 20:34:19,718 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
142 |
+
2024-06-03 20:34:19,722 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
143 |
+
2024-06-03 20:34:19,722 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
144 |
+
2024-06-03 20:34:21,601 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
145 |
+
2024-06-03 20:34:21,602 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
146 |
+
2024-06-03 20:34:21,602 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
147 |
+
2024-06-03 20:34:23,187 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
148 |
+
2024-06-03 20:37:48,397 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
149 |
+
2024-06-03 20:37:48,397 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
150 |
+
2024-06-03 20:38:33,502 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
151 |
+
2024-06-03 20:41:58,862 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
152 |
+
2024-06-03 20:41:58,862 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
153 |
+
2024-06-03 20:43:51,168 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
154 |
+
2024-06-03 20:43:51,171 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
155 |
+
2024-06-03 20:43:51,171 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
156 |
+
2024-06-03 20:43:53,895 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
157 |
+
2024-06-03 20:47:25,895 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
158 |
+
2024-06-03 20:47:25,895 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
159 |
+
2024-06-03 20:50:07,262 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
160 |
+
2024-06-03 20:50:07,303 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
161 |
+
2024-06-03 20:50:07,303 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
162 |
+
2024-06-03 20:50:09,915 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
163 |
+
2024-06-03 20:50:09,916 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
164 |
+
2024-06-03 20:50:09,917 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
165 |
+
2024-06-03 20:50:10,463 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
166 |
+
2024-06-03 20:50:10,484 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
167 |
+
2024-06-03 20:50:10,484 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
168 |
+
2024-06-03 20:50:13,975 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
169 |
+
2024-06-03 20:50:14,119 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
170 |
+
2024-06-03 20:50:14,119 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
|
171 |
+
2024-06-03 20:50:15,412 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
|
172 |
+
2024-06-03 20:50:16,872 INFO MainThread:34 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'tiiuae/falcon-7b', 'transformers_version': '4.41.1', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_falcon.FalconConfig', 'AutoModel': 'tiiuae/falcon-7b--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 20, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Jun03_20-50-07_f28ebe0d2526', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 4, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'othmanfa/fsttModel', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': True, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False}
|
wandb/run-20240603_175449-d191dh7n/run-d191dh7n.wandb
ADDED
Binary file (278 kB). View file
|
|