shikunl commited on
Commit
59567a9
1 Parent(s): 361ea77

Add configs

Browse files
prismer/configs/caption.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ coco:
2
+ dataset: 'coco'
3
+ data_path: '/workspace_dataset/dataset_vqa'
4
+ label_path: '/workspace_dataset/dataset_experts'
5
+ experts: ['depth', 'normal', 'seg_coco', 'edge', 'obj_detection', 'ocr_detection'] # 'none' for PrismerZ
6
+ image_resolution: 480
7
+ prismer_model: 'prismer_base' # 'prismer-large' for Prismer(Z)-Large
8
+ freeze: 'freeze_vision'
9
+
10
+ batch_size_train: 4 # for 8 * 8 nodes [effective batch-size: 256]
11
+ batch_size_test: 8
12
+ init_lr: 5e-5
13
+ weight_decay: 0.05
14
+ min_lr: 0
15
+ max_epoch: 3
16
+
17
+ prefix: 'A picture of' # use prefix for fine-tuning or no pre-fix '' for zero-shot experiments
18
+
19
+ nocaps:
20
+ dataset: 'nocaps'
21
+ data_path: '/workspace_dataset/dataset_vqa'
22
+ label_path: '/workspace_dataset/dataset_experts'
23
+ experts: ['depth', 'normal', 'seg_coco', 'edge', 'obj_detection', 'ocr_detection'] # 'none' for PrismerZ
24
+
25
+ image_resolution: 480
26
+ prismer_model: 'prismer_base' # 'prismer-large' for Prismer(Z)-Large
27
+ freeze: 'freeze_vision'
28
+
29
+ batch_size_train: 4 # for 8 * 8 nodes [effective batch-size: 256]
30
+ batch_size_test: 8
31
+ init_lr: 5e-5
32
+ weight_decay: 0.05
33
+ min_lr: 0
34
+ max_epoch: 3
35
+
36
+ prefix: 'A picture of' # use prefix for fine-tuning or no pre-fix '' for zero-shot experiments
37
+
38
+ demo:
39
+ dataset: 'demo'
40
+ data_path: 'helpers'
41
+ label_path: 'helpers/labels'
42
+ experts: ['depth', 'normal', 'seg_coco', 'edge', 'obj_detection', 'ocr_detection'] # 'none' for PrismerZ
43
+
44
+ image_resolution: 480
45
+ prismer_model: 'prismer_base' # 'prismer-large' for Prismer(Z)-Large
46
+ freeze: 'freeze_vision'
47
+
48
+ prefix: 'A picture of'
prismer/configs/classification.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data_path: '/workspace_dataset/dataset_zero'
2
+ label_path: '/workspace_dataset/dataset_experts'
3
+
4
+ experts: ['depth', 'normal', 'seg_coco', 'edge', 'obj_detection', 'ocr_detection'] # 'none' for PrismerZ
5
+
6
+ freeze: 'freeze_vision'
7
+ dataset: 'imagenet'
8
+ shots: 1
9
+
10
+ image_resolution: 384
11
+ prismer_model: 'prismer_base' # 'prismer-large' for Prismer(Z)-Large
12
+
13
+ batch_size_train: 2 # for 4 * 8 nodes [effective batch-size: 64]
14
+ batch_size_test: 8
15
+ init_lr: 5e-5
16
+ weight_decay: 0.05
17
+ min_lr: 0
18
+ max_epoch: 20
19
+
20
+ k_test: 32
21
+ prefix: 'A photo of a'
prismer/configs/experts.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ data_path: 'helpers'
2
+ save_path: 'helpers/labels'
prismer/configs/pretrain.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets: ['cc12m', 'cc3m_sgu', 'coco', 'vg']
2
+
3
+ cc12m_data_path: '/workspace_dataset/cc12m'
4
+ cc3m_data_path: '/home/datasets/cc3m'
5
+ coco_data_path: '/workspace_dataset/dataset_vqa'
6
+ vg_data_path: '/home/datasets/vqa'
7
+ label_path: '/workspace_dataset/dataset_experts'
8
+
9
+ experts: ['depth', 'normal', 'seg_coco', 'edge', 'obj_detection', 'ocr_detection'] # 'none' for PrismerZ
10
+
11
+ image_resolution: 224
12
+ prismer_model: 'prismer_base' # 'prismer-large' for Prismer(Z)-Large
13
+ freeze: 'freeze_lang_vision'
14
+ batch_size_train: 32 # for 4 * 8 nodes [effective batch-size: 1024]
15
+
16
+ max_epoch: 20
17
+ weight_decay: 0.05
18
+ init_lr: 3e-4 # 1e-4 for prismer_large
19
+ min_lr: 1e-6
20
+ warmup_lr: 1e-6
21
+ warmup_steps: 2000
prismer/configs/prismer.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "prismer_base": {
3
+ "roberta_model": {
4
+ "attention_probs_dropout_prob": 0.1,
5
+ "bos_token_id": 0,
6
+ "eos_token_id": 2,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "vision_hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-05,
14
+ "max_position_embeddings": 514,
15
+ "model_name": "roberta-base",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 1,
19
+ "type_vocab_size": 1,
20
+ "vocab_size": 50265,
21
+ "num_decoder_layers": 4,
22
+ "is_decoder": true
23
+ },
24
+ "vit_model": "ViT-B/16"
25
+ },
26
+ "prismer_large": {
27
+ "roberta_model": {
28
+ "attention_probs_dropout_prob": 0.1,
29
+ "bos_token_id": 0,
30
+ "eos_token_id": 2,
31
+ "hidden_act": "gelu",
32
+ "hidden_dropout_prob": 0.1,
33
+ "hidden_size": 1024,
34
+ "vision_hidden_size": 1024,
35
+ "initializer_range": 0.02,
36
+ "intermediate_size": 4096,
37
+ "layer_norm_eps": 1e-05,
38
+ "max_position_embeddings": 514,
39
+ "model_name": "roberta-large",
40
+ "num_attention_heads": 16,
41
+ "num_hidden_layers": 24,
42
+ "pad_token_id": 1,
43
+ "type_vocab_size": 1,
44
+ "vocab_size": 50265,
45
+ "num_decoder_layers": 4,
46
+ "is_decoder": true
47
+ },
48
+ "vit_model": "ViT-L/14@336px"
49
+ },
50
+ "prismer_huge": {
51
+ "roberta_model": {
52
+ "attention_probs_dropout_prob": 0.1,
53
+ "bos_token_id": 0,
54
+ "eos_token_id": 2,
55
+ "hidden_act": "gelu",
56
+ "hidden_dropout_prob": 0.1,
57
+ "hidden_size": 1024,
58
+ "vision_hidden_size": 1280,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "max_position_embeddings": 514,
63
+ "model_name": "roberta-large",
64
+ "num_attention_heads": 16,
65
+ "num_hidden_layers": 24,
66
+ "pad_token_id": 1,
67
+ "type_vocab_size": 1,
68
+ "vocab_size": 50265,
69
+ "num_decoder_layers": 4,
70
+ "is_decoder": true
71
+ },
72
+ "vit_model": "ViT-H/14"
73
+ }
74
+ }
prismer/configs/roberta.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "roberta-base": {
3
+ "attention_probs_dropout_prob": 0.1,
4
+ "bos_token_id": 0,
5
+ "eos_token_id": 2,
6
+ "hidden_act": "gelu",
7
+ "hidden_dropout_prob": 0.1,
8
+ "hidden_size": 768,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 3072,
11
+ "layer_norm_eps": 1e-05,
12
+ "max_position_embeddings": 514,
13
+ "model_type": "roberta",
14
+ "num_attention_heads": 12,
15
+ "num_hidden_layers": 12,
16
+ "pad_token_id": 1,
17
+ "type_vocab_size": 1,
18
+ "vocab_size": 50265,
19
+ "num_decoder_layers": 4,
20
+ "is_decoder": true
21
+ },
22
+ "roberta-large": {
23
+ "attention_probs_dropout_prob": 0.1,
24
+ "bos_token_id": 0,
25
+ "eos_token_id": 2,
26
+ "hidden_act": "gelu",
27
+ "hidden_dropout_prob": 0.1,
28
+ "hidden_size": 1024,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 4096,
31
+ "layer_norm_eps": 1e-05,
32
+ "max_position_embeddings": 514,
33
+ "model_type": "roberta",
34
+ "num_attention_heads": 16,
35
+ "num_hidden_layers": 24,
36
+ "pad_token_id": 1,
37
+ "type_vocab_size": 1,
38
+ "vocab_size": 50265,
39
+ "num_decoder_layers": 4,
40
+ "is_decoder": true
41
+ }
42
+ }
prismer/configs/vqa.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets: ['vqav2', 'vg']
2
+ data_path: '/workspace_dataset/dataset_vqa'
3
+ label_path: '/workspace_dataset/dataset_experts'
4
+ experts: ['depth', 'normal', 'seg_coco', 'edge', 'obj_detection', 'ocr_detection'] # 'none' for PrismerZ
5
+
6
+ image_resolution: 480
7
+ prismer_model: 'prismer_base' # 'prismer-large' for Prismer(Z)-Large
8
+ freeze: 'freeze_vision'
9
+
10
+ batch_size_train: 8 # for 8 * 8 nodes [effective batch-size: 512]
11
+ batch_size_test: 32
12
+ init_lr: 5e-5
13
+ weight_decay: 0.05
14
+ min_lr: 0
15
+ max_epoch: 10
16
+
17
+ k_test: 16
18
+ inference: 'rank'