COVER / cover.yml
nanushio
- [MINOR] [SCRIPT] [UPDATE] 1. change path to COVER.pth in cover.yml
11b1aa8
raw
history blame
7.14 kB
name: COVER
num_epochs: 0
l_num_epochs: 10
warmup_epochs: 2.5
ema: true
save_model: true
batch_size: 8
num_workers: 6
split_seed: 42
wandb:
project_name: COVER
data:
val-livevqc:
type: ViewDecompositionDataset
args:
weight: 0.598
phase: test
anno_file: ./examplar_data_labels/LIVE_VQC/labels.txt
data_prefix: ./datasets/LIVE_VQC/ # revert before submit
sample_types:
semantic:
size_h: 512
size_w: 512
clip_len: 20
frame_interval: 2
t_frag: 20
num_clips: 1
technical:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 40
clip_len: 40
t_frag: 20
frame_interval: 2
num_clips: 1
aesthetic:
size_h: 224
size_w: 224
clip_len: 40
frame_interval: 2
t_frag: 20
num_clips: 1
val-kv1k:
type: ViewDecompositionDataset
args:
weight: 0.540
phase: test
anno_file: ./examplar_data_labels/KoNViD/labels.txt
data_prefix: ./datasets/KoNViD/ # revert before submit
sample_types:
semantic:
size_h: 512
size_w: 512
clip_len: 20
frame_interval: 2
t_frag: 20
num_clips: 1
technical:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 40
clip_len: 40
t_frag: 20
frame_interval: 2
num_clips: 1
aesthetic:
size_h: 224
size_w: 224
clip_len: 40
frame_interval: 2
t_frag: 20
num_clips: 1
val-ltest:
type: ViewDecompositionDataset
args:
weight: 0.603
phase: test
anno_file: ./examplar_data_labels/LSVQ/labels_test.txt
data_prefix: ./datasets/LSVQ/ # revert before submit
sample_types:
semantic:
size_h: 512
size_w: 512
clip_len: 20
frame_interval: 2
t_frag: 20
num_clips: 1
technical:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 40
clip_len: 40
t_frag: 20
frame_interval: 2
num_clips: 1
aesthetic:
size_h: 224
size_w: 224
clip_len: 40
frame_interval: 2
t_frag: 20
num_clips: 1
val-l1080p:
type: ViewDecompositionDataset
args:
weight: 0.620
phase: test
anno_file: ./examplar_data_labels/LSVQ/labels_1080p.txt
data_prefix: ./datasets/LSVQ/ # revert before submit
sample_types:
semantic:
size_h: 512
size_w: 512
clip_len: 20
frame_interval: 2
t_frag: 20
num_clips: 1
technical:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 40
clip_len: 40
t_frag: 20
frame_interval: 2
num_clips: 1
aesthetic:
size_h: 224
size_w: 224
clip_len: 40
frame_interval: 2
t_frag: 20
num_clips: 1
val-cvd2014:
type: ViewDecompositionDataset
args:
weight: 0.576
phase: test
anno_file: ./examplar_data_labels/CVD2014/labels.txt
data_prefix: ./datasets/CVD2014/ # revert before submit
sample_types:
semantic:
size_h: 512
size_w: 512
clip_len: 20
frame_interval: 2
t_frag: 20
num_clips: 1
technical:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 40
clip_len: 40
t_frag: 20
frame_interval: 2
num_clips: 1
aesthetic:
size_h: 224
size_w: 224
clip_len: 40
frame_interval: 2
t_frag: 20
num_clips: 1
val-ytugc:
type: ViewDecompositionDataset
args:
weight: 0.443
phase: test
anno_file: ./examplar_data_labels/YouTubeUGC/labels.txt
data_prefix: ./dataset/YouTubeUGC/ # revert before submit
sample_types:
semantic:
size_h: 512
size_w: 512
clip_len: 20
frame_interval: 2
t_frag: 20
num_clips: 1
technical:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 40
clip_len: 40
t_frag: 20
frame_interval: 2
num_clips: 1
aesthetic:
size_h: 224
size_w: 224
clip_len: 40
frame_interval: 2
t_frag: 20
num_clips: 1
model:
type: COVER
args:
backbone:
technical:
type: swin_tiny_grpb
checkpoint: true
pretrained:
aesthetic:
type: conv_tiny
semantic:
type: clip_iqa+
backbone_preserve_keys: technical,aesthetic,semantic
divide_head: true
vqa_head:
in_channels: 768
hidden_channels: 64
optimizer:
lr: !!float 1e-3
backbone_lr_mult: !!float 1e-1
wd: 0.05
test_load_path: ./COVER.pth # revert before submit