name: COVER num_epochs: 0 l_num_epochs: 10 warmup_epochs: 2.5 ema: true save_model: true batch_size: 8 num_workers: 6 split_seed: 42 wandb: project_name: COVER data: val-livevqc: type: ViewDecompositionDataset args: weight: 0.598 phase: test anno_file: ./examplar_data_labels/LIVE_VQC/labels.txt data_prefix: ./datasets/LIVE_VQC/ # revert before submit sample_types: semantic: size_h: 512 size_w: 512 clip_len: 1 frame_interval: 2 t_frag: 1 num_clips: 1 technical: fragments_h: 7 fragments_w: 7 fsize_h: 32 fsize_w: 32 aligned: 2 clip_len: 2 t_frag: 1 frame_interval: 2 num_clips: 1 aesthetic: size_h: 224 size_w: 224 clip_len: 2 frame_interval: 2 t_frag: 1 num_clips: 1 val-kv1k: type: ViewDecompositionDataset args: weight: 0.540 phase: test anno_file: ./examplar_data_labels/KoNViD/labels.txt data_prefix: ./datasets/KoNViD/ # revert before submit sample_types: semantic: size_h: 512 size_w: 512 clip_len: 1 frame_interval: 2 t_frag: 1 num_clips: 1 technical: fragments_h: 7 fragments_w: 7 fsize_h: 32 fsize_w: 32 aligned: 2 clip_len: 2 t_frag: 1 frame_interval: 2 num_clips: 1 aesthetic: size_h: 224 size_w: 224 clip_len: 2 frame_interval: 2 t_frag: 1 num_clips: 1 val-ltest: type: ViewDecompositionDataset args: weight: 0.603 phase: test anno_file: ./examplar_data_labels/LSVQ/labels_test.txt data_prefix: ./datasets/LSVQ/ # revert before submit sample_types: semantic: size_h: 512 size_w: 512 clip_len: 1 frame_interval: 2 t_frag: 1 num_clips: 1 technical: fragments_h: 7 fragments_w: 7 fsize_h: 32 fsize_w: 32 aligned: 2 clip_len: 2 t_frag: 1 frame_interval: 2 num_clips: 1 aesthetic: size_h: 224 size_w: 224 clip_len: 2 frame_interval: 2 t_frag: 1 num_clips: 1 val-l1080p: type: ViewDecompositionDataset args: weight: 0.620 phase: test anno_file: ./examplar_data_labels/LSVQ/labels_1080p.txt data_prefix: ./datasets/LSVQ/ # revert before submit sample_types: semantic: size_h: 512 size_w: 512 clip_len: 1 frame_interval: 2 t_frag: 1 num_clips: 1 technical: fragments_h: 7 fragments_w: 7 fsize_h: 32 fsize_w: 32 aligned: 2 clip_len: 2 t_frag: 1 frame_interval: 2 num_clips: 1 aesthetic: size_h: 224 size_w: 224 clip_len: 2 frame_interval: 2 t_frag: 1 num_clips: 1 val-cvd2014: type: ViewDecompositionDataset args: weight: 0.576 phase: test anno_file: ./examplar_data_labels/CVD2014/labels.txt data_prefix: ./datasets/CVD2014/ # revert before submit sample_types: semantic: size_h: 512 size_w: 512 clip_len: 1 frame_interval: 2 t_frag: 1 num_clips: 1 technical: fragments_h: 7 fragments_w: 7 fsize_h: 32 fsize_w: 32 aligned: 2 clip_len: 2 t_frag: 1 frame_interval: 2 num_clips: 1 aesthetic: size_h: 224 size_w: 224 clip_len: 2 frame_interval: 2 t_frag: 1 num_clips: 1 val-ytugc: type: ViewDecompositionDataset args: weight: 0.443 phase: test anno_file: ./examplar_data_labels/YouTubeUGC/labels.txt data_prefix: ./dataset/YouTubeUGC/ # revert before submit sample_types: semantic: size_h: 512 size_w: 512 clip_len: 1 frame_interval: 2 t_frag: 1 num_clips: 1 technical: fragments_h: 7 fragments_w: 7 fsize_h: 32 fsize_w: 32 aligned: 2 clip_len: 2 t_frag: 1 frame_interval: 2 num_clips: 1 aesthetic: size_h: 224 size_w: 224 clip_len: 2 frame_interval: 2 t_frag: 1 num_clips: 1 model: type: COVER args: backbone: technical: type: swin_tiny_grpb checkpoint: true pretrained: aesthetic: type: conv_tiny semantic: type: clip_iqa+ backbone_preserve_keys: technical,aesthetic,semantic divide_head: true vqa_head: in_channels: 768 hidden_channels: 64 optimizer: lr: !!float 1e-3 backbone_lr_mult: !!float 1e-1 wd: 0.05 test_load_path: ./COVER.pth # revert before submit