umuthopeyildirim commited on
Commit
d4cabcc
1 Parent(s): 39356bf

Upload folder using huggingface_hub

Browse files
.ipynb_checkpoints/train_log-checkpoint.txt ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NEW RUN 2024-03-22-12-52-51
2
+ {'load_model': '', 'model_path': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_visual.pth', 'wandb': '', 'proj_dir': 'out/mod-rwkv3b', 'random_seed': -1, 'data_file': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/lspd_instruct_300k.json', 'data_type': 'json', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 1000, 'epoch_count': 84, 'epoch_begin': 0, 'epoch_save': 10, 'micro_bsz': 1, 'n_layer': 32, 'n_embd': 2560, 'dim_att': 2560, 'dim_ffn': 8960, 'pre_ffn': 0, 'head_size_a': 64, 'head_size_divisor': 8, 'lr_init': 2e-05, 'lr_final': 2e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'ds_bucket_mb': 200, 'vision_tower_name': 'openai/clip-vit-large-patch14-336', 'image_folder': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/images/', 'grid_size': -1, 'detail': 'low', 'freeze_rwkv': 0, 'freeze_proj': 0, 'image_position': 'first', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '8', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 16, 'max_epochs': 84, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-03-22-12-52-51', 'betas': (0.9, 0.99), 'real_bsz': 8, 'run_name': '65536 ctx1024 L32 D2560', 'tokenizer': <src.rwkv_tokenizer.TRIE_TOKENIZER object at 0x7faccf2a0e20>, 'image_processor': CLIPImageProcessor {
3
+ "_valid_processor_keys": [
4
+ "images",
5
+ "do_resize",
6
+ "size",
7
+ "resample",
8
+ "do_center_crop",
9
+ "crop_size",
10
+ "do_rescale",
11
+ "rescale_factor",
12
+ "do_normalize",
13
+ "image_mean",
14
+ "image_std",
15
+ "do_convert_rgb",
16
+ "return_tensors",
17
+ "data_format",
18
+ "input_data_format"
19
+ ],
20
+ "crop_size": {
21
+ "height": 336,
22
+ "width": 336
23
+ },
24
+ "do_center_crop": true,
25
+ "do_convert_rgb": true,
26
+ "do_normalize": true,
27
+ "do_rescale": true,
28
+ "do_resize": true,
29
+ "image_mean": [
30
+ 0.48145466,
31
+ 0.4578275,
32
+ 0.40821073
33
+ ],
34
+ "image_processor_type": "CLIPImageProcessor",
35
+ "image_std": [
36
+ 0.26862954,
37
+ 0.26130258,
38
+ 0.27577711
39
+ ],
40
+ "resample": 3,
41
+ "rescale_factor": 0.00392156862745098,
42
+ "size": {
43
+ "shortest_edge": 336
44
+ }
45
+ }
46
+ }
47
+ {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 16, 'train_micro_batch_size_per_gpu': 1, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
48
+ NEW RUN 2024-03-22-12-59-25
49
+ {'load_model': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_rwkv.pth', 'model_path': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_visual.pth', 'wandb': '', 'proj_dir': 'out/mod-rwkv3b', 'random_seed': -1, 'data_file': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/lspd_instruct_300k.json', 'data_type': 'json', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 1000, 'epoch_count': 84, 'epoch_begin': 0, 'epoch_save': 10, 'micro_bsz': 1, 'n_layer': 32, 'n_embd': 2560, 'dim_att': 2560, 'dim_ffn': 8960, 'pre_ffn': 0, 'head_size_a': 64, 'head_size_divisor': 8, 'lr_init': 2e-05, 'lr_final': 2e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'ds_bucket_mb': 200, 'vision_tower_name': 'openai/clip-vit-large-patch14-336', 'image_folder': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/images/', 'grid_size': -1, 'detail': 'low', 'freeze_rwkv': 0, 'freeze_proj': 0, 'image_position': 'first', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '8', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 16, 'max_epochs': 84, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-03-22-12-59-25', 'betas': (0.9, 0.99), 'real_bsz': 8, 'run_name': '65536 ctx1024 L32 D2560', 'tokenizer': <src.rwkv_tokenizer.TRIE_TOKENIZER object at 0x7f489dba0d60>, 'image_processor': CLIPImageProcessor {
50
+ "_valid_processor_keys": [
51
+ "images",
52
+ "do_resize",
53
+ "size",
54
+ "resample",
55
+ "do_center_crop",
56
+ "crop_size",
57
+ "do_rescale",
58
+ "rescale_factor",
59
+ "do_normalize",
60
+ "image_mean",
61
+ "image_std",
62
+ "do_convert_rgb",
63
+ "return_tensors",
64
+ "data_format",
65
+ "input_data_format"
66
+ ],
67
+ "crop_size": {
68
+ "height": 336,
69
+ "width": 336
70
+ },
71
+ "do_center_crop": true,
72
+ "do_convert_rgb": true,
73
+ "do_normalize": true,
74
+ "do_rescale": true,
75
+ "do_resize": true,
76
+ "image_mean": [
77
+ 0.48145466,
78
+ 0.4578275,
79
+ 0.40821073
80
+ ],
81
+ "image_processor_type": "CLIPImageProcessor",
82
+ "image_std": [
83
+ 0.26862954,
84
+ 0.26130258,
85
+ 0.27577711
86
+ ],
87
+ "resample": 3,
88
+ "rescale_factor": 0.00392156862745098,
89
+ "size": {
90
+ "shortest_edge": 336
91
+ }
92
+ }
93
+ }
94
+ {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 16, 'train_micro_batch_size_per_gpu': 1, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
95
+ NEW RUN 2024-03-22-13-07-04
96
+ {'load_model': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_rwkv.pth', 'model_path': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_visual.pth', 'wandb': 'mod-visionrwkv', 'proj_dir': 'out/mod-rwkv3b', 'random_seed': -1, 'data_file': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/lspd_instruct_300k.json', 'data_type': 'json', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 1000, 'epoch_count': 84, 'epoch_begin': 0, 'epoch_save': 10, 'micro_bsz': 1, 'n_layer': 32, 'n_embd': 2560, 'dim_att': 2560, 'dim_ffn': 8960, 'pre_ffn': 0, 'head_size_a': 64, 'head_size_divisor': 8, 'lr_init': 2e-05, 'lr_final': 2e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'ds_bucket_mb': 200, 'vision_tower_name': 'openai/clip-vit-large-patch14-336', 'image_folder': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/images/', 'grid_size': -1, 'detail': 'low', 'freeze_rwkv': 0, 'freeze_proj': 0, 'image_position': 'first', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '8', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 16, 'max_epochs': 84, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-03-22-13-07-04', 'betas': (0.9, 0.99), 'real_bsz': 8, 'run_name': '65536 ctx1024 L32 D2560', 'tokenizer': <src.rwkv_tokenizer.TRIE_TOKENIZER object at 0x7ff394498ca0>, 'image_processor': CLIPImageProcessor {
97
+ "_valid_processor_keys": [
98
+ "images",
99
+ "do_resize",
100
+ "size",
101
+ "resample",
102
+ "do_center_crop",
103
+ "crop_size",
104
+ "do_rescale",
105
+ "rescale_factor",
106
+ "do_normalize",
107
+ "image_mean",
108
+ "image_std",
109
+ "do_convert_rgb",
110
+ "return_tensors",
111
+ "data_format",
112
+ "input_data_format"
113
+ ],
114
+ "crop_size": {
115
+ "height": 336,
116
+ "width": 336
117
+ },
118
+ "do_center_crop": true,
119
+ "do_convert_rgb": true,
120
+ "do_normalize": true,
121
+ "do_rescale": true,
122
+ "do_resize": true,
123
+ "image_mean": [
124
+ 0.48145466,
125
+ 0.4578275,
126
+ 0.40821073
127
+ ],
128
+ "image_processor_type": "CLIPImageProcessor",
129
+ "image_std": [
130
+ 0.26862954,
131
+ 0.26130258,
132
+ 0.27577711
133
+ ],
134
+ "resample": 3,
135
+ "rescale_factor": 0.00392156862745098,
136
+ "size": {
137
+ "shortest_edge": 336
138
+ }
139
+ }
140
+ }
141
+ {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 16, 'train_micro_batch_size_per_gpu': 1, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
142
+ 0 0.746040 2.1086 0.00002000 2024-03-22 13:19:43.537517 0
143
+ 1 0.548254 1.7302 0.00002000 2024-03-22 13:26:36.346264 1
144
+ 2 0.512289 1.6691 0.00002000 2024-03-22 13:33:34.708854 2
145
+ 3 0.488853 1.6304 0.00002000 2024-03-22 13:40:27.546320 3
146
+ 4 0.473739 1.6060 0.00002000 2024-03-22 13:47:24.732973 4
147
+ 5 0.462538 1.5881 0.00002000 2024-03-22 13:56:23.787183 5
148
+ 6 0.452319 1.5720 0.00002000 2024-03-22 14:14:45.871084 6
149
+ 7 0.445833 1.5618 0.00002000 2024-03-22 14:28:19.761131 7
150
+ 8 0.442095 1.5560 0.00002000 2024-03-22 14:35:13.433437 8
151
+ 9 0.438205 1.5499 0.00002000 2024-03-22 14:42:05.641207 9
152
+ 10 0.430697 1.5383 0.00002000 2024-03-22 14:49:09.052478 10
153
+ 11 0.427149 1.5329 0.00002000 2024-03-22 14:56:00.841882 11
154
+ 12 0.422073 1.5251 0.00002000 2024-03-22 15:02:52.957633 12
155
+ 13 0.419680 1.5215 0.00002000 2024-03-22 15:09:46.643417 13
156
+ 14 0.415501 1.5151 0.00002000 2024-03-22 15:16:39.635809 14
157
+ 15 0.414843 1.5141 0.00002000 2024-03-22 15:23:55.234766 15
158
+ 16 0.407996 1.5038 0.00002000 2024-03-22 15:37:45.479784 16
159
+ 17 0.408106 1.5040 0.00002000 2024-03-22 15:47:39.039546 17
rwkv-0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edc0525077e550138a9e5ff86254fbcb6f97cd983b1db4be67efe34efcc07b4a
3
+ size 6738530782
rwkv-10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de321d4671fef5c9cb8e03576006f43a0ebbe9f03ddeb8c82909ea8848869d04
3
+ size 6738531178
rwkv-20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:246a22cfd53b6ab4a5042d603f517d5cdb7a00bdff4c140db533502ff52df2fe
3
+ size 6738531178
train_log.txt ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NEW RUN 2024-03-22-12-52-51
2
+ {'load_model': '', 'model_path': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_visual.pth', 'wandb': '', 'proj_dir': 'out/mod-rwkv3b', 'random_seed': -1, 'data_file': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/lspd_instruct_300k.json', 'data_type': 'json', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 1000, 'epoch_count': 84, 'epoch_begin': 0, 'epoch_save': 10, 'micro_bsz': 1, 'n_layer': 32, 'n_embd': 2560, 'dim_att': 2560, 'dim_ffn': 8960, 'pre_ffn': 0, 'head_size_a': 64, 'head_size_divisor': 8, 'lr_init': 2e-05, 'lr_final': 2e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'ds_bucket_mb': 200, 'vision_tower_name': 'openai/clip-vit-large-patch14-336', 'image_folder': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/images/', 'grid_size': -1, 'detail': 'low', 'freeze_rwkv': 0, 'freeze_proj': 0, 'image_position': 'first', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '8', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 16, 'max_epochs': 84, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-03-22-12-52-51', 'betas': (0.9, 0.99), 'real_bsz': 8, 'run_name': '65536 ctx1024 L32 D2560', 'tokenizer': <src.rwkv_tokenizer.TRIE_TOKENIZER object at 0x7faccf2a0e20>, 'image_processor': CLIPImageProcessor {
3
+ "_valid_processor_keys": [
4
+ "images",
5
+ "do_resize",
6
+ "size",
7
+ "resample",
8
+ "do_center_crop",
9
+ "crop_size",
10
+ "do_rescale",
11
+ "rescale_factor",
12
+ "do_normalize",
13
+ "image_mean",
14
+ "image_std",
15
+ "do_convert_rgb",
16
+ "return_tensors",
17
+ "data_format",
18
+ "input_data_format"
19
+ ],
20
+ "crop_size": {
21
+ "height": 336,
22
+ "width": 336
23
+ },
24
+ "do_center_crop": true,
25
+ "do_convert_rgb": true,
26
+ "do_normalize": true,
27
+ "do_rescale": true,
28
+ "do_resize": true,
29
+ "image_mean": [
30
+ 0.48145466,
31
+ 0.4578275,
32
+ 0.40821073
33
+ ],
34
+ "image_processor_type": "CLIPImageProcessor",
35
+ "image_std": [
36
+ 0.26862954,
37
+ 0.26130258,
38
+ 0.27577711
39
+ ],
40
+ "resample": 3,
41
+ "rescale_factor": 0.00392156862745098,
42
+ "size": {
43
+ "shortest_edge": 336
44
+ }
45
+ }
46
+ }
47
+ {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 16, 'train_micro_batch_size_per_gpu': 1, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
48
+ NEW RUN 2024-03-22-12-59-25
49
+ {'load_model': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_rwkv.pth', 'model_path': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_visual.pth', 'wandb': '', 'proj_dir': 'out/mod-rwkv3b', 'random_seed': -1, 'data_file': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/lspd_instruct_300k.json', 'data_type': 'json', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 1000, 'epoch_count': 84, 'epoch_begin': 0, 'epoch_save': 10, 'micro_bsz': 1, 'n_layer': 32, 'n_embd': 2560, 'dim_att': 2560, 'dim_ffn': 8960, 'pre_ffn': 0, 'head_size_a': 64, 'head_size_divisor': 8, 'lr_init': 2e-05, 'lr_final': 2e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'ds_bucket_mb': 200, 'vision_tower_name': 'openai/clip-vit-large-patch14-336', 'image_folder': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/images/', 'grid_size': -1, 'detail': 'low', 'freeze_rwkv': 0, 'freeze_proj': 0, 'image_position': 'first', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '8', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 16, 'max_epochs': 84, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-03-22-12-59-25', 'betas': (0.9, 0.99), 'real_bsz': 8, 'run_name': '65536 ctx1024 L32 D2560', 'tokenizer': <src.rwkv_tokenizer.TRIE_TOKENIZER object at 0x7f489dba0d60>, 'image_processor': CLIPImageProcessor {
50
+ "_valid_processor_keys": [
51
+ "images",
52
+ "do_resize",
53
+ "size",
54
+ "resample",
55
+ "do_center_crop",
56
+ "crop_size",
57
+ "do_rescale",
58
+ "rescale_factor",
59
+ "do_normalize",
60
+ "image_mean",
61
+ "image_std",
62
+ "do_convert_rgb",
63
+ "return_tensors",
64
+ "data_format",
65
+ "input_data_format"
66
+ ],
67
+ "crop_size": {
68
+ "height": 336,
69
+ "width": 336
70
+ },
71
+ "do_center_crop": true,
72
+ "do_convert_rgb": true,
73
+ "do_normalize": true,
74
+ "do_rescale": true,
75
+ "do_resize": true,
76
+ "image_mean": [
77
+ 0.48145466,
78
+ 0.4578275,
79
+ 0.40821073
80
+ ],
81
+ "image_processor_type": "CLIPImageProcessor",
82
+ "image_std": [
83
+ 0.26862954,
84
+ 0.26130258,
85
+ 0.27577711
86
+ ],
87
+ "resample": 3,
88
+ "rescale_factor": 0.00392156862745098,
89
+ "size": {
90
+ "shortest_edge": 336
91
+ }
92
+ }
93
+ }
94
+ {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 16, 'train_micro_batch_size_per_gpu': 1, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
95
+ NEW RUN 2024-03-22-13-07-04
96
+ {'load_model': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_rwkv.pth', 'model_path': '/workspace/VisualRWKV/models/rwkv3b-vitl336p14-577token_mix665k_8gpu_visual.pth', 'wandb': 'mod-visionrwkv', 'proj_dir': 'out/mod-rwkv3b', 'random_seed': -1, 'data_file': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/lspd_instruct_300k.json', 'data_type': 'json', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 1000, 'epoch_count': 84, 'epoch_begin': 0, 'epoch_save': 10, 'micro_bsz': 1, 'n_layer': 32, 'n_embd': 2560, 'dim_att': 2560, 'dim_ffn': 8960, 'pre_ffn': 0, 'head_size_a': 64, 'head_size_divisor': 8, 'lr_init': 2e-05, 'lr_final': 2e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'ds_bucket_mb': 200, 'vision_tower_name': 'openai/clip-vit-large-patch14-336', 'image_folder': '/workspace/VisualRWKV/dataset/mod-rwkv-paper-image/images/', 'grid_size': -1, 'detail': 'low', 'freeze_rwkv': 0, 'freeze_proj': 0, 'image_position': 'first', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '8', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 16, 'max_epochs': 84, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-03-22-13-07-04', 'betas': (0.9, 0.99), 'real_bsz': 8, 'run_name': '65536 ctx1024 L32 D2560', 'tokenizer': <src.rwkv_tokenizer.TRIE_TOKENIZER object at 0x7ff394498ca0>, 'image_processor': CLIPImageProcessor {
97
+ "_valid_processor_keys": [
98
+ "images",
99
+ "do_resize",
100
+ "size",
101
+ "resample",
102
+ "do_center_crop",
103
+ "crop_size",
104
+ "do_rescale",
105
+ "rescale_factor",
106
+ "do_normalize",
107
+ "image_mean",
108
+ "image_std",
109
+ "do_convert_rgb",
110
+ "return_tensors",
111
+ "data_format",
112
+ "input_data_format"
113
+ ],
114
+ "crop_size": {
115
+ "height": 336,
116
+ "width": 336
117
+ },
118
+ "do_center_crop": true,
119
+ "do_convert_rgb": true,
120
+ "do_normalize": true,
121
+ "do_rescale": true,
122
+ "do_resize": true,
123
+ "image_mean": [
124
+ 0.48145466,
125
+ 0.4578275,
126
+ 0.40821073
127
+ ],
128
+ "image_processor_type": "CLIPImageProcessor",
129
+ "image_std": [
130
+ 0.26862954,
131
+ 0.26130258,
132
+ 0.27577711
133
+ ],
134
+ "resample": 3,
135
+ "rescale_factor": 0.00392156862745098,
136
+ "size": {
137
+ "shortest_edge": 336
138
+ }
139
+ }
140
+ }
141
+ {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 16, 'train_micro_batch_size_per_gpu': 1, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
142
+ 0 0.746040 2.1086 0.00002000 2024-03-22 13:19:43.537517 0
143
+ 1 0.548254 1.7302 0.00002000 2024-03-22 13:26:36.346264 1
144
+ 2 0.512289 1.6691 0.00002000 2024-03-22 13:33:34.708854 2
145
+ 3 0.488853 1.6304 0.00002000 2024-03-22 13:40:27.546320 3
146
+ 4 0.473739 1.6060 0.00002000 2024-03-22 13:47:24.732973 4
147
+ 5 0.462538 1.5881 0.00002000 2024-03-22 13:56:23.787183 5
148
+ 6 0.452319 1.5720 0.00002000 2024-03-22 14:14:45.871084 6
149
+ 7 0.445833 1.5618 0.00002000 2024-03-22 14:28:19.761131 7
150
+ 8 0.442095 1.5560 0.00002000 2024-03-22 14:35:13.433437 8
151
+ 9 0.438205 1.5499 0.00002000 2024-03-22 14:42:05.641207 9
152
+ 10 0.430697 1.5383 0.00002000 2024-03-22 14:49:09.052478 10
153
+ 11 0.427149 1.5329 0.00002000 2024-03-22 14:56:00.841882 11
154
+ 12 0.422073 1.5251 0.00002000 2024-03-22 15:02:52.957633 12
155
+ 13 0.419680 1.5215 0.00002000 2024-03-22 15:09:46.643417 13
156
+ 14 0.415501 1.5151 0.00002000 2024-03-22 15:16:39.635809 14
157
+ 15 0.414843 1.5141 0.00002000 2024-03-22 15:23:55.234766 15
158
+ 16 0.407996 1.5038 0.00002000 2024-03-22 15:37:45.479784 16
159
+ 17 0.408106 1.5040 0.00002000 2024-03-22 15:47:39.039546 17
160
+ 18 0.406108 1.5010 0.00002000 2024-03-22 15:54:31.434503 18
161
+ 19 0.404124 1.4980 0.00002000 2024-03-22 16:01:23.939296 19
162
+ 20 0.401706 1.4944 0.00002000 2024-03-22 16:08:29.167253 20