umuthopeyildirim
commited on
Commit
•
6495049
1
Parent(s):
d4cabcc
Delete .ipynb_checkpoints
Browse files
.ipynb_checkpoints/train_log-checkpoint.txt
DELETED
@@ -1,159 +0,0 @@
|
|
1 |
-
NEW RUN 2024-03-22-12-52-51
|
2 |
-
{'load_model': '', 'model_path': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_visual.pth', 'wandb': '', 'proj_dir': 'out/mod-rwkv3b', 'random_seed': -1, 'data_file': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/lspd_instruct_300k.json', 'data_type': 'json', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 1000, 'epoch_count': 84, 'epoch_begin': 0, 'epoch_save': 10, 'micro_bsz': 1, 'n_layer': 32, 'n_embd': 2560, 'dim_att': 2560, 'dim_ffn': 8960, 'pre_ffn': 0, 'head_size_a': 64, 'head_size_divisor': 8, 'lr_init': 2e-05, 'lr_final': 2e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'ds_bucket_mb': 200, 'vision_tower_name': 'openai/clip-vit-large-patch14-336', 'image_folder': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/images/', 'grid_size': -1, 'detail': 'low', 'freeze_rwkv': 0, 'freeze_proj': 0, 'image_position': 'first', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '8', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 16, 'max_epochs': 84, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-03-22-12-52-51', 'betas': (0.9, 0.99), 'real_bsz': 8, 'run_name': '65536 ctx1024 L32 D2560', 'tokenizer': <src.rwkv_tokenizer.TRIE_TOKENIZER object at 0x7faccf2a0e20>, 'image_processor': CLIPImageProcessor {
|
3 |
-
"_valid_processor_keys": [
|
4 |
-
"images",
|
5 |
-
"do_resize",
|
6 |
-
"size",
|
7 |
-
"resample",
|
8 |
-
"do_center_crop",
|
9 |
-
"crop_size",
|
10 |
-
"do_rescale",
|
11 |
-
"rescale_factor",
|
12 |
-
"do_normalize",
|
13 |
-
"image_mean",
|
14 |
-
"image_std",
|
15 |
-
"do_convert_rgb",
|
16 |
-
"return_tensors",
|
17 |
-
"data_format",
|
18 |
-
"input_data_format"
|
19 |
-
],
|
20 |
-
"crop_size": {
|
21 |
-
"height": 336,
|
22 |
-
"width": 336
|
23 |
-
},
|
24 |
-
"do_center_crop": true,
|
25 |
-
"do_convert_rgb": true,
|
26 |
-
"do_normalize": true,
|
27 |
-
"do_rescale": true,
|
28 |
-
"do_resize": true,
|
29 |
-
"image_mean": [
|
30 |
-
0.48145466,
|
31 |
-
0.4578275,
|
32 |
-
0.40821073
|
33 |
-
],
|
34 |
-
"image_processor_type": "CLIPImageProcessor",
|
35 |
-
"image_std": [
|
36 |
-
0.26862954,
|
37 |
-
0.26130258,
|
38 |
-
0.27577711
|
39 |
-
],
|
40 |
-
"resample": 3,
|
41 |
-
"rescale_factor": 0.00392156862745098,
|
42 |
-
"size": {
|
43 |
-
"shortest_edge": 336
|
44 |
-
}
|
45 |
-
}
|
46 |
-
}
|
47 |
-
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 16, 'train_micro_batch_size_per_gpu': 1, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
48 |
-
NEW RUN 2024-03-22-12-59-25
|
49 |
-
{'load_model': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_rwkv.pth', 'model_path': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_visual.pth', 'wandb': '', 'proj_dir': 'out/mod-rwkv3b', 'random_seed': -1, 'data_file': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/lspd_instruct_300k.json', 'data_type': 'json', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 1000, 'epoch_count': 84, 'epoch_begin': 0, 'epoch_save': 10, 'micro_bsz': 1, 'n_layer': 32, 'n_embd': 2560, 'dim_att': 2560, 'dim_ffn': 8960, 'pre_ffn': 0, 'head_size_a': 64, 'head_size_divisor': 8, 'lr_init': 2e-05, 'lr_final': 2e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'ds_bucket_mb': 200, 'vision_tower_name': 'openai/clip-vit-large-patch14-336', 'image_folder': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/images/', 'grid_size': -1, 'detail': 'low', 'freeze_rwkv': 0, 'freeze_proj': 0, 'image_position': 'first', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '8', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 16, 'max_epochs': 84, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-03-22-12-59-25', 'betas': (0.9, 0.99), 'real_bsz': 8, 'run_name': '65536 ctx1024 L32 D2560', 'tokenizer': <src.rwkv_tokenizer.TRIE_TOKENIZER object at 0x7f489dba0d60>, 'image_processor': CLIPImageProcessor {
|
50 |
-
"_valid_processor_keys": [
|
51 |
-
"images",
|
52 |
-
"do_resize",
|
53 |
-
"size",
|
54 |
-
"resample",
|
55 |
-
"do_center_crop",
|
56 |
-
"crop_size",
|
57 |
-
"do_rescale",
|
58 |
-
"rescale_factor",
|
59 |
-
"do_normalize",
|
60 |
-
"image_mean",
|
61 |
-
"image_std",
|
62 |
-
"do_convert_rgb",
|
63 |
-
"return_tensors",
|
64 |
-
"data_format",
|
65 |
-
"input_data_format"
|
66 |
-
],
|
67 |
-
"crop_size": {
|
68 |
-
"height": 336,
|
69 |
-
"width": 336
|
70 |
-
},
|
71 |
-
"do_center_crop": true,
|
72 |
-
"do_convert_rgb": true,
|
73 |
-
"do_normalize": true,
|
74 |
-
"do_rescale": true,
|
75 |
-
"do_resize": true,
|
76 |
-
"image_mean": [
|
77 |
-
0.48145466,
|
78 |
-
0.4578275,
|
79 |
-
0.40821073
|
80 |
-
],
|
81 |
-
"image_processor_type": "CLIPImageProcessor",
|
82 |
-
"image_std": [
|
83 |
-
0.26862954,
|
84 |
-
0.26130258,
|
85 |
-
0.27577711
|
86 |
-
],
|
87 |
-
"resample": 3,
|
88 |
-
"rescale_factor": 0.00392156862745098,
|
89 |
-
"size": {
|
90 |
-
"shortest_edge": 336
|
91 |
-
}
|
92 |
-
}
|
93 |
-
}
|
94 |
-
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 16, 'train_micro_batch_size_per_gpu': 1, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
95 |
-
NEW RUN 2024-03-22-13-07-04
|
96 |
-
{'load_model': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_rwkv.pth', 'model_path': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_visual.pth', 'wandb': 'mod-visionrwkv', 'proj_dir': 'out/mod-rwkv3b', 'random_seed': -1, 'data_file': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/lspd_instruct_300k.json', 'data_type': 'json', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 1000, 'epoch_count': 84, 'epoch_begin': 0, 'epoch_save': 10, 'micro_bsz': 1, 'n_layer': 32, 'n_embd': 2560, 'dim_att': 2560, 'dim_ffn': 8960, 'pre_ffn': 0, 'head_size_a': 64, 'head_size_divisor': 8, 'lr_init': 2e-05, 'lr_final': 2e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'ds_bucket_mb': 200, 'vision_tower_name': 'openai/clip-vit-large-patch14-336', 'image_folder': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/images/', 'grid_size': -1, 'detail': 'low', 'freeze_rwkv': 0, 'freeze_proj': 0, 'image_position': 'first', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '8', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 16, 'max_epochs': 84, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-03-22-13-07-04', 'betas': (0.9, 0.99), 'real_bsz': 8, 'run_name': '65536 ctx1024 L32 D2560', 'tokenizer': <src.rwkv_tokenizer.TRIE_TOKENIZER object at 0x7ff394498ca0>, 'image_processor': CLIPImageProcessor {
|
97 |
-
"_valid_processor_keys": [
|
98 |
-
"images",
|
99 |
-
"do_resize",
|
100 |
-
"size",
|
101 |
-
"resample",
|
102 |
-
"do_center_crop",
|
103 |
-
"crop_size",
|
104 |
-
"do_rescale",
|
105 |
-
"rescale_factor",
|
106 |
-
"do_normalize",
|
107 |
-
"image_mean",
|
108 |
-
"image_std",
|
109 |
-
"do_convert_rgb",
|
110 |
-
"return_tensors",
|
111 |
-
"data_format",
|
112 |
-
"input_data_format"
|
113 |
-
],
|
114 |
-
"crop_size": {
|
115 |
-
"height": 336,
|
116 |
-
"width": 336
|
117 |
-
},
|
118 |
-
"do_center_crop": true,
|
119 |
-
"do_convert_rgb": true,
|
120 |
-
"do_normalize": true,
|
121 |
-
"do_rescale": true,
|
122 |
-
"do_resize": true,
|
123 |
-
"image_mean": [
|
124 |
-
0.48145466,
|
125 |
-
0.4578275,
|
126 |
-
0.40821073
|
127 |
-
],
|
128 |
-
"image_processor_type": "CLIPImageProcessor",
|
129 |
-
"image_std": [
|
130 |
-
0.26862954,
|
131 |
-
0.26130258,
|
132 |
-
0.27577711
|
133 |
-
],
|
134 |
-
"resample": 3,
|
135 |
-
"rescale_factor": 0.00392156862745098,
|
136 |
-
"size": {
|
137 |
-
"shortest_edge": 336
|
138 |
-
}
|
139 |
-
}
|
140 |
-
}
|
141 |
-
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 16, 'train_micro_batch_size_per_gpu': 1, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
142 |
-
0 0.746040 2.1086 0.00002000 2024-03-22 13:19:43.537517 0
|
143 |
-
1 0.548254 1.7302 0.00002000 2024-03-22 13:26:36.346264 1
|
144 |
-
2 0.512289 1.6691 0.00002000 2024-03-22 13:33:34.708854 2
|
145 |
-
3 0.488853 1.6304 0.00002000 2024-03-22 13:40:27.546320 3
|
146 |
-
4 0.473739 1.6060 0.00002000 2024-03-22 13:47:24.732973 4
|
147 |
-
5 0.462538 1.5881 0.00002000 2024-03-22 13:56:23.787183 5
|
148 |
-
6 0.452319 1.5720 0.00002000 2024-03-22 14:14:45.871084 6
|
149 |
-
7 0.445833 1.5618 0.00002000 2024-03-22 14:28:19.761131 7
|
150 |
-
8 0.442095 1.5560 0.00002000 2024-03-22 14:35:13.433437 8
|
151 |
-
9 0.438205 1.5499 0.00002000 2024-03-22 14:42:05.641207 9
|
152 |
-
10 0.430697 1.5383 0.00002000 2024-03-22 14:49:09.052478 10
|
153 |
-
11 0.427149 1.5329 0.00002000 2024-03-22 14:56:00.841882 11
|
154 |
-
12 0.422073 1.5251 0.00002000 2024-03-22 15:02:52.957633 12
|
155 |
-
13 0.419680 1.5215 0.00002000 2024-03-22 15:09:46.643417 13
|
156 |
-
14 0.415501 1.5151 0.00002000 2024-03-22 15:16:39.635809 14
|
157 |
-
15 0.414843 1.5141 0.00002000 2024-03-22 15:23:55.234766 15
|
158 |
-
16 0.407996 1.5038 0.00002000 2024-03-22 15:37:45.479784 16
|
159 |
-
17 0.408106 1.5040 0.00002000 2024-03-22 15:47:39.039546 17
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|