cooperll commited on
Commit
2514fb4
1 Parent(s): 34f2497

LambdaSuperRes initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +10 -0
  2. KAIR/LICENSE +9 -0
  3. KAIR/README.md +343 -0
  4. KAIR/data/__init__.py +1 -0
  5. KAIR/data/dataset_blindsr.py +92 -0
  6. KAIR/data/dataset_dncnn.py +101 -0
  7. KAIR/data/dataset_dnpatch.py +133 -0
  8. KAIR/data/dataset_dpsr.py +131 -0
  9. KAIR/data/dataset_fdncnn.py +109 -0
  10. KAIR/data/dataset_ffdnet.py +103 -0
  11. KAIR/data/dataset_jpeg.py +118 -0
  12. KAIR/data/dataset_l.py +43 -0
  13. KAIR/data/dataset_plain.py +85 -0
  14. KAIR/data/dataset_plainpatch.py +131 -0
  15. KAIR/data/dataset_sr.py +197 -0
  16. KAIR/data/dataset_srmd.py +155 -0
  17. KAIR/data/dataset_usrnet.py +126 -0
  18. KAIR/data/dataset_video_test.py +382 -0
  19. KAIR/data/dataset_video_train.py +390 -0
  20. KAIR/data/degradations.py +145 -0
  21. KAIR/data/select_dataset.py +86 -0
  22. KAIR/docs/README_SwinIR.md +194 -0
  23. KAIR/docs/README_VRT.md +191 -0
  24. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_095438.json +201 -0
  25. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_095450.json +201 -0
  26. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_095518.json +201 -0
  27. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_101636.json +201 -0
  28. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_101949.json +201 -0
  29. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_102114.json +201 -0
  30. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_102214.json +201 -0
  31. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_104612.json +201 -0
  32. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_105219.json +201 -0
  33. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_105304.json +201 -0
  34. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_105340.json +201 -0
  35. KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/train.log +0 -0
  36. KAIR/experiments/003_train_vrt_videosr_bi_vimeo_7frames/options/003_train_vrt_videosr_bi_vimeo_7frames_220311_095626.json +198 -0
  37. KAIR/experiments/003_train_vrt_videosr_bi_vimeo_7frames/options/003_train_vrt_videosr_bi_vimeo_7frames_220311_101027.json +198 -0
  38. KAIR/experiments/003_train_vrt_videosr_bi_vimeo_7frames/options/003_train_vrt_videosr_bi_vimeo_7frames_220311_101042.json +198 -0
  39. KAIR/experiments/003_train_vrt_videosr_bi_vimeo_7frames/options/003_train_vrt_videosr_bi_vimeo_7frames_220311_101058.json +198 -0
  40. KAIR/experiments/003_train_vrt_videosr_bi_vimeo_7frames/train.log +0 -0
  41. KAIR/image_degradation.py +106 -0
  42. KAIR/kernels/Levin09.mat +0 -0
  43. KAIR/kernels/k_large_1.png +0 -0
  44. KAIR/kernels/k_large_2.png +0 -0
  45. KAIR/kernels/kernels_12.mat +0 -0
  46. KAIR/kernels/kernels_bicubicx234.mat +0 -0
  47. KAIR/kernels/srmd_pca_matlab.mat +0 -0
  48. KAIR/main_challenge_sr.py +174 -0
  49. KAIR/main_download_pretrained_models.py +141 -0
  50. KAIR/main_test_dncnn.py +203 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ *.swp
2
+ *.swo
3
+
4
+ __pycache__
5
+ *.pyc
6
+
7
+ sr_interactive_tmp
8
+ sr_interactive_tmp_output
9
+
10
+ gradio_cached_examples
KAIR/LICENSE ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Kai Zhang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
KAIR/README.md ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Training and testing codes for USRNet, DnCNN, FFDNet, SRMD, DPSR, MSRResNet, ESRGAN, BSRGAN, SwinIR, VRT
2
+ [![download](https://img.shields.io/github/downloads/cszn/KAIR/total.svg)](https://github.com/cszn/KAIR/releases) ![visitors](https://visitor-badge.glitch.me/badge?page_id=cszn/KAIR)
3
+
4
+ [Kai Zhang](https://cszn.github.io/)
5
+
6
+ *[Computer Vision Lab](https://vision.ee.ethz.ch/the-institute.html), ETH Zurich, Switzerland*
7
+
8
+ _______
9
+ - **_News (2022-02-15)_**: We release [the training codes](https://github.com/cszn/KAIR/blob/master/docs/README_VRT.md) of [VRT ![GitHub Stars](https://img.shields.io/github/stars/JingyunLiang/VRT?style=social)](https://github.com/JingyunLiang/VRT) for video SR, deblurring and denoising.
10
+ <p align="center">
11
+ <a href="https://github.com/JingyunLiang/VRT">
12
+ <img width=30% src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/teaser_vsr.gif"/>
13
+ <img width=30% src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/teaser_vdb.gif"/>
14
+ <img width=30% src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/teaser_vdn.gif"/>
15
+ </a>
16
+ </p>
17
+
18
+ - **_News (2021-12-23)_**: Our techniques are adopted in [https://www.amemori.ai/](https://www.amemori.ai/).
19
+ - **_News (2021-12-23)_**: Our new work for practical image denoising.
20
+
21
+ - <img src="figs/palace.png" height="320px"/> <img src="figs/palace_HSCU.png" height="320px"/>
22
+ - [<img src="https://github.com/cszn/KAIR/raw/master/figs/denoising_02.png" height="256px"/>](https://imgsli.com/ODczMTc)
23
+ [<img src="https://github.com/cszn/KAIR/raw/master/figs/denoising_01.png" height="256px"/>](https://imgsli.com/ODczMTY)
24
+ - **_News (2021-09-09)_**: Add [main_download_pretrained_models.py](https://github.com/cszn/KAIR/blob/master/main_download_pretrained_models.py) to download pre-trained models.
25
+ - **_News (2021-09-08)_**: Add [matlab code](https://github.com/cszn/KAIR/tree/master/matlab) to zoom local part of an image for the purpose of comparison between different results.
26
+ - **_News (2021-09-07)_**: We upload [the training code](https://github.com/cszn/KAIR/blob/master/docs/README_SwinIR.md) of [SwinIR ![GitHub Stars](https://img.shields.io/github/stars/JingyunLiang/SwinIR?style=social)](https://github.com/JingyunLiang/SwinIR) and provide an [interactive online Colob demo for real-world image SR](https://colab.research.google.com/gist/JingyunLiang/a5e3e54bc9ef8d7bf594f6fee8208533/swinir-demo-on-real-world-image-sr.ipynb). Try to super-resolve your own images on Colab! <a href="https://colab.research.google.com/gist/JingyunLiang/a5e3e54bc9ef8d7bf594f6fee8208533/swinir-demo-on-real-world-image-sr.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="google colab logo"></a>
27
+
28
+ |Real-World Image (x4)|[BSRGAN, ICCV2021](https://github.com/cszn/BSRGAN)|[Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN)|SwinIR (ours)|
29
+ | :--- | :---: | :-----: | :-----: |
30
+ |<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/ETH_LR.png">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/ETH_BSRGAN.png">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/ETH_realESRGAN.jpg">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/ETH_SwinIR.png">
31
+ |<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/OST_009_crop_LR.png">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/OST_009_crop_BSRGAN.png">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/OST_009_crop_realESRGAN.png">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/OST_009_crop_SwinIR.png">|
32
+
33
+ - **_News (2021-08-31)_**: We upload the [training code of BSRGAN](https://github.com/cszn/BSRGAN#training).
34
+ - **_News (2021-08-24)_**: We upload the BSRGAN degradation model.
35
+ - **_News (2021-08-22)_**: Support multi-feature-layer VGG perceptual loss and UNet discriminator.
36
+ - **_News (2021-08-18)_**: We upload the extended BSRGAN degradation model. It is slightly different from our published version.
37
+
38
+ - **_News (2021-06-03)_**: Add testing codes of [GPEN (CVPR21)](https://github.com/yangxy/GPEN) for face image enhancement: [main_test_face_enhancement.py](https://github.com/cszn/KAIR/blob/master/main_test_face_enhancement.py)
39
+
40
+ <img src="figs/face_04_comparison.png" width="730px"/>
41
+ <img src="figs/face_13_comparison.png" width="730px"/>
42
+ <img src="figs/face_08_comparison.png" width="730px"/>
43
+ <img src="figs/face_01_comparison.png" width="730px"/>
44
+ <img src="figs/face_12_comparison.png" width="730px"/>
45
+ <img src="figs/face_10_comparison.png" width="730px"/>
46
+
47
+
48
+ - **_News (2021-05-13)_**: Add [PatchGAN discriminator](https://github.com/cszn/KAIR/blob/master/models/network_discriminator.py).
49
+
50
+ - **_News (2021-05-12)_**: Support distributed training, see also [https://github.com/xinntao/BasicSR/blob/master/docs/TrainTest.md](https://github.com/xinntao/BasicSR/blob/master/docs/TrainTest.md).
51
+
52
+ - **_News (2021-01)_**: [BSRGAN](https://github.com/cszn/BSRGAN) for blind real image super-resolution will be added.
53
+
54
+ - **_Pull requests are welcome!_**
55
+
56
+ - **Correction (2020-10)**: If you use multiple GPUs for GAN training, remove or comment [Line 105](https://github.com/cszn/KAIR/blob/e52a6944c6a40ba81b88430ffe38fd6517e0449e/models/model_gan.py#L105) to enable `DataParallel` for fast training
57
+
58
+ - **News (2020-10)**: Add [utils_receptivefield.py](https://github.com/cszn/KAIR/blob/master/utils/utils_receptivefield.py) to calculate receptive field.
59
+
60
+ - **News (2020-8)**: A `deep plug-and-play image restoration toolbox` is released at [cszn/DPIR](https://github.com/cszn/DPIR).
61
+
62
+ - **Tips (2020-8)**: Use [this](https://github.com/cszn/KAIR/blob/9fd17abff001ab82a22070f7e442bb5246d2d844/main_challenge_sr.py#L147) to avoid `out of memory` issue.
63
+
64
+ - **News (2020-7)**: Add [main_challenge_sr.py](https://github.com/cszn/KAIR/blob/23b0d0f717980e48fad02513ba14045d57264fe1/main_challenge_sr.py#L90) to get `FLOPs`, `#Params`, `Runtime`, `#Activations`, `#Conv`, and `Max Memory Allocated`.
65
+ ```python
66
+ from utils.utils_modelsummary import get_model_activation, get_model_flops
67
+ input_dim = (3, 256, 256) # set the input dimension
68
+ activations, num_conv2d = get_model_activation(model, input_dim)
69
+ logger.info('{:>16s} : {:<.4f} [M]'.format('#Activations', activations/10**6))
70
+ logger.info('{:>16s} : {:<d}'.format('#Conv2d', num_conv2d))
71
+ flops = get_model_flops(model, input_dim, False)
72
+ logger.info('{:>16s} : {:<.4f} [G]'.format('FLOPs', flops/10**9))
73
+ num_parameters = sum(map(lambda x: x.numel(), model.parameters()))
74
+ logger.info('{:>16s} : {:<.4f} [M]'.format('#Params', num_parameters/10**6))
75
+ ```
76
+
77
+ - **News (2020-6)**: Add [USRNet (CVPR 2020)](https://github.com/cszn/USRNet) for training and testing.
78
+ - [Network Architecture](https://github.com/cszn/KAIR/blob/3357aa0e54b81b1e26ceb1cee990f39add235e17/models/network_usrnet.py#L309)
79
+ - [Dataset](https://github.com/cszn/KAIR/blob/6c852636d3715bb281637863822a42c72739122a/data/dataset_usrnet.py#L16)
80
+
81
+
82
+ Clone repo
83
+ ----------
84
+ ```
85
+ git clone https://github.com/cszn/KAIR.git
86
+ ```
87
+ ```
88
+ pip install -r requirement.txt
89
+ ```
90
+
91
+
92
+
93
+ Training
94
+ ----------
95
+
96
+ You should modify the json file from [options](https://github.com/cszn/KAIR/tree/master/options) first, for example,
97
+ setting ["gpu_ids": [0,1,2,3]](https://github.com/cszn/KAIR/blob/ff80d265f64de67dfb3ffa9beff8949773c81a3d/options/train_msrresnet_psnr.json#L4) if 4 GPUs are used,
98
+ setting ["dataroot_H": "trainsets/trainH"](https://github.com/cszn/KAIR/blob/ff80d265f64de67dfb3ffa9beff8949773c81a3d/options/train_msrresnet_psnr.json#L24) if path of the high quality dataset is `trainsets/trainH`.
99
+
100
+ - Training with `DataParallel` - PSNR
101
+
102
+
103
+ ```python
104
+ python main_train_psnr.py --opt options/train_msrresnet_psnr.json
105
+ ```
106
+
107
+ - Training with `DataParallel` - GAN
108
+
109
+ ```python
110
+ python main_train_gan.py --opt options/train_msrresnet_gan.json
111
+ ```
112
+
113
+ - Training with `DistributedDataParallel` - PSNR - 4 GPUs
114
+
115
+ ```python
116
+ python -m torch.distributed.launch --nproc_per_node=4 --master_port=1234 main_train_psnr.py --opt options/train_msrresnet_psnr.json --dist True
117
+ ```
118
+
119
+ - Training with `DistributedDataParallel` - PSNR - 8 GPUs
120
+
121
+ ```python
122
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_psnr.py --opt options/train_msrresnet_psnr.json --dist True
123
+ ```
124
+
125
+ - Training with `DistributedDataParallel` - GAN - 4 GPUs
126
+
127
+ ```python
128
+ python -m torch.distributed.launch --nproc_per_node=4 --master_port=1234 main_train_gan.py --opt options/train_msrresnet_gan.json --dist True
129
+ ```
130
+
131
+ - Training with `DistributedDataParallel` - GAN - 8 GPUs
132
+
133
+ ```python
134
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_gan.py --opt options/train_msrresnet_gan.json --dist True
135
+ ```
136
+
137
+ - Kill distributed training processes of `main_train_gan.py`
138
+
139
+ ```python
140
+ kill $(ps aux | grep main_train_gan.py | grep -v grep | awk '{print $2}')
141
+ ```
142
+
143
+ ----------
144
+ | Method | Original Link |
145
+ |---|---|
146
+ | DnCNN |[https://github.com/cszn/DnCNN](https://github.com/cszn/DnCNN)|
147
+ | FDnCNN |[https://github.com/cszn/DnCNN](https://github.com/cszn/DnCNN)|
148
+ | FFDNet | [https://github.com/cszn/FFDNet](https://github.com/cszn/FFDNet)|
149
+ | SRMD | [https://github.com/cszn/SRMD](https://github.com/cszn/SRMD)|
150
+ | DPSR-SRResNet | [https://github.com/cszn/DPSR](https://github.com/cszn/DPSR)|
151
+ | SRResNet | [https://github.com/xinntao/BasicSR](https://github.com/xinntao/BasicSR)|
152
+ | ESRGAN | [https://github.com/xinntao/ESRGAN](https://github.com/xinntao/ESRGAN)|
153
+ | RRDB | [https://github.com/xinntao/ESRGAN](https://github.com/xinntao/ESRGAN)|
154
+ | IMDB | [https://github.com/Zheng222/IMDN](https://github.com/Zheng222/IMDN)|
155
+ | USRNet | [https://github.com/cszn/USRNet](https://github.com/cszn/USRNet)|
156
+ | DRUNet | [https://github.com/cszn/DPIR](https://github.com/cszn/DPIR)|
157
+ | DPIR | [https://github.com/cszn/DPIR](https://github.com/cszn/DPIR)|
158
+ | BSRGAN | [https://github.com/cszn/BSRGAN](https://github.com/cszn/BSRGAN)|
159
+ | SwinIR | [https://github.com/JingyunLiang/SwinIR](https://github.com/JingyunLiang/SwinIR)|
160
+ | VRT | [https://github.com/JingyunLiang/VRT](https://github.com/JingyunLiang/VRT) |
161
+
162
+ Network architectures
163
+ ----------
164
+ * [USRNet](https://github.com/cszn/USRNet)
165
+
166
+ <img src="https://github.com/cszn/USRNet/blob/master/figs/architecture.png" width="600px"/>
167
+
168
+ * DnCNN
169
+
170
+ <img src="https://github.com/cszn/DnCNN/blob/master/figs/dncnn.png" width="600px"/>
171
+
172
+ * IRCNN denoiser
173
+
174
+ <img src="https://github.com/lipengFu/IRCNN/raw/master/Image/image_2.png" width="680px"/>
175
+
176
+ * FFDNet
177
+
178
+ <img src="https://github.com/cszn/FFDNet/blob/master/figs/ffdnet.png" width="600px"/>
179
+
180
+ * SRMD
181
+
182
+ <img src="https://github.com/cszn/SRMD/blob/master/figs/architecture.png" width="605px"/>
183
+
184
+ * SRResNet, SRGAN, RRDB, ESRGAN
185
+
186
+ <img src="https://github.com/xinntao/ESRGAN/blob/master/figures/architecture.jpg" width="595px"/>
187
+
188
+ * IMDN
189
+
190
+ <img src="figs/imdn.png" width="460px"/> ----- <img src="figs/imdn_block.png" width="100px"/>
191
+
192
+
193
+
194
+ Testing
195
+ ----------
196
+ |Method | [model_zoo](model_zoo)|
197
+ |---|---|
198
+ | [main_test_dncnn.py](main_test_dncnn.py) |```dncnn_15.pth, dncnn_25.pth, dncnn_50.pth, dncnn_gray_blind.pth, dncnn_color_blind.pth, dncnn3.pth```|
199
+ | [main_test_ircnn_denoiser.py](main_test_ircnn_denoiser.py) | ```ircnn_gray.pth, ircnn_color.pth```|
200
+ | [main_test_fdncnn.py](main_test_fdncnn.py) | ```fdncnn_gray.pth, fdncnn_color.pth, fdncnn_gray_clip.pth, fdncnn_color_clip.pth```|
201
+ | [main_test_ffdnet.py](main_test_ffdnet.py) | ```ffdnet_gray.pth, ffdnet_color.pth, ffdnet_gray_clip.pth, ffdnet_color_clip.pth```|
202
+ | [main_test_srmd.py](main_test_srmd.py) | ```srmdnf_x2.pth, srmdnf_x3.pth, srmdnf_x4.pth, srmd_x2.pth, srmd_x3.pth, srmd_x4.pth```|
203
+ | | **The above models are converted from MatConvNet.** |
204
+ | [main_test_dpsr.py](main_test_dpsr.py) | ```dpsr_x2.pth, dpsr_x3.pth, dpsr_x4.pth, dpsr_x4_gan.pth```|
205
+ | [main_test_msrresnet.py](main_test_msrresnet.py) | ```msrresnet_x4_psnr.pth, msrresnet_x4_gan.pth```|
206
+ | [main_test_rrdb.py](main_test_rrdb.py) | ```rrdb_x4_psnr.pth, rrdb_x4_esrgan.pth```|
207
+ | [main_test_imdn.py](main_test_imdn.py) | ```imdn_x4.pth```|
208
+
209
+ [model_zoo](model_zoo)
210
+ --------
211
+ - download link [https://drive.google.com/drive/folders/13kfr3qny7S2xwG9h7v95F5mkWs0OmU0D](https://drive.google.com/drive/folders/13kfr3qny7S2xwG9h7v95F5mkWs0OmU0D)
212
+
213
+ [trainsets](trainsets)
214
+ ----------
215
+ - [https://github.com/xinntao/BasicSR/blob/master/docs/DatasetPreparation.md](https://github.com/xinntao/BasicSR/blob/master/docs/DatasetPreparation.md)
216
+ - [train400](https://github.com/cszn/DnCNN/tree/master/TrainingCodes/DnCNN_TrainingCodes_v1.0/data)
217
+ - [DIV2K](https://data.vision.ee.ethz.ch/cvl/DIV2K/)
218
+ - [Flickr2K](https://cv.snu.ac.kr/research/EDSR/Flickr2K.tar)
219
+ - optional: use [split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_size=512, p_overlap=96, p_max=800)](https://github.com/cszn/KAIR/blob/3ee0bf3e07b90ec0b7302d97ee2adb780617e637/utils/utils_image.py#L123) to get ```trainsets/trainH``` with small images for fast data loading
220
+
221
+ [testsets](testsets)
222
+ -----------
223
+ - [https://github.com/xinntao/BasicSR/blob/master/docs/DatasetPreparation.md](https://github.com/xinntao/BasicSR/blob/master/docs/DatasetPreparation.md)
224
+ - [set12](https://github.com/cszn/FFDNet/tree/master/testsets)
225
+ - [bsd68](https://github.com/cszn/FFDNet/tree/master/testsets)
226
+ - [cbsd68](https://github.com/cszn/FFDNet/tree/master/testsets)
227
+ - [kodak24](https://github.com/cszn/FFDNet/tree/master/testsets)
228
+ - [srbsd68](https://github.com/cszn/DPSR/tree/master/testsets/BSD68/GT)
229
+ - set5
230
+ - set14
231
+ - cbsd100
232
+ - urban100
233
+ - manga109
234
+
235
+
236
+ References
237
+ ----------
238
+ ```BibTex
239
+ @article{liang2022vrt,
240
+ title={VRT: A Video Restoration Transformer},
241
+ author={Liang, Jingyun and Cao, Jiezhang and Fan, Yuchen and Zhang, Kai and Ranjan, Rakesh and Li, Yawei and Timofte, Radu and Van Gool, Luc},
242
+ journal={arXiv preprint arXiv:2022.00000},
243
+ year={2022}
244
+ }
245
+ @inproceedings{liang2021swinir,
246
+ title={SwinIR: Image Restoration Using Swin Transformer},
247
+ author={Liang, Jingyun and Cao, Jiezhang and Sun, Guolei and Zhang, Kai and Van Gool, Luc and Timofte, Radu},
248
+ booktitle={IEEE International Conference on Computer Vision Workshops},
249
+ pages={1833--1844},
250
+ year={2021}
251
+ }
252
+ @inproceedings{zhang2021designing,
253
+ title={Designing a Practical Degradation Model for Deep Blind Image Super-Resolution},
254
+ author={Zhang, Kai and Liang, Jingyun and Van Gool, Luc and Timofte, Radu},
255
+ booktitle={IEEE International Conference on Computer Vision},
256
+ pages={4791--4800},
257
+ year={2021}
258
+ }
259
+ @article{zhang2021plug, % DPIR & DRUNet & IRCNN
260
+ title={Plug-and-Play Image Restoration with Deep Denoiser Prior},
261
+ author={Zhang, Kai and Li, Yawei and Zuo, Wangmeng and Zhang, Lei and Van Gool, Luc and Timofte, Radu},
262
+ journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
263
+ year={2021}
264
+ }
265
+ @inproceedings{zhang2020aim, % efficientSR_challenge
266
+ title={AIM 2020 Challenge on Efficient Super-Resolution: Methods and Results},
267
+ author={Kai Zhang and Martin Danelljan and Yawei Li and Radu Timofte and others},
268
+ booktitle={European Conference on Computer Vision Workshops},
269
+ year={2020}
270
+ }
271
+ @inproceedings{zhang2020deep, % USRNet
272
+ title={Deep unfolding network for image super-resolution},
273
+ author={Zhang, Kai and Van Gool, Luc and Timofte, Radu},
274
+ booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
275
+ pages={3217--3226},
276
+ year={2020}
277
+ }
278
+ @article{zhang2017beyond, % DnCNN
279
+ title={Beyond a gaussian denoiser: Residual learning of deep cnn for image denoising},
280
+ author={Zhang, Kai and Zuo, Wangmeng and Chen, Yunjin and Meng, Deyu and Zhang, Lei},
281
+ journal={IEEE Transactions on Image Processing},
282
+ volume={26},
283
+ number={7},
284
+ pages={3142--3155},
285
+ year={2017}
286
+ }
287
+ @inproceedings{zhang2017learning, % IRCNN
288
+ title={Learning deep CNN denoiser prior for image restoration},
289
+ author={Zhang, Kai and Zuo, Wangmeng and Gu, Shuhang and Zhang, Lei},
290
+ booktitle={IEEE conference on computer vision and pattern recognition},
291
+ pages={3929--3938},
292
+ year={2017}
293
+ }
294
+ @article{zhang2018ffdnet, % FFDNet, FDnCNN
295
+ title={FFDNet: Toward a fast and flexible solution for CNN-based image denoising},
296
+ author={Zhang, Kai and Zuo, Wangmeng and Zhang, Lei},
297
+ journal={IEEE Transactions on Image Processing},
298
+ volume={27},
299
+ number={9},
300
+ pages={4608--4622},
301
+ year={2018}
302
+ }
303
+ @inproceedings{zhang2018learning, % SRMD
304
+ title={Learning a single convolutional super-resolution network for multiple degradations},
305
+ author={Zhang, Kai and Zuo, Wangmeng and Zhang, Lei},
306
+ booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
307
+ pages={3262--3271},
308
+ year={2018}
309
+ }
310
+ @inproceedings{zhang2019deep, % DPSR
311
+ title={Deep Plug-and-Play Super-Resolution for Arbitrary Blur Kernels},
312
+ author={Zhang, Kai and Zuo, Wangmeng and Zhang, Lei},
313
+ booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
314
+ pages={1671--1681},
315
+ year={2019}
316
+ }
317
+ @InProceedings{wang2018esrgan, % ESRGAN, MSRResNet
318
+ author = {Wang, Xintao and Yu, Ke and Wu, Shixiang and Gu, Jinjin and Liu, Yihao and Dong, Chao and Qiao, Yu and Loy, Chen Change},
319
+ title = {ESRGAN: Enhanced super-resolution generative adversarial networks},
320
+ booktitle = {The European Conference on Computer Vision Workshops (ECCVW)},
321
+ month = {September},
322
+ year = {2018}
323
+ }
324
+ @inproceedings{hui2019lightweight, % IMDN
325
+ title={Lightweight Image Super-Resolution with Information Multi-distillation Network},
326
+ author={Hui, Zheng and Gao, Xinbo and Yang, Yunchu and Wang, Xiumei},
327
+ booktitle={Proceedings of the 27th ACM International Conference on Multimedia (ACM MM)},
328
+ pages={2024--2032},
329
+ year={2019}
330
+ }
331
+ @inproceedings{zhang2019aim, % IMDN
332
+ title={AIM 2019 Challenge on Constrained Super-Resolution: Methods and Results},
333
+ author={Kai Zhang and Shuhang Gu and Radu Timofte and others},
334
+ booktitle={IEEE International Conference on Computer Vision Workshops},
335
+ year={2019}
336
+ }
337
+ @inproceedings{yang2021gan,
338
+ title={GAN Prior Embedded Network for Blind Face Restoration in the Wild},
339
+ author={Tao Yang, Peiran Ren, Xuansong Xie, and Lei Zhang},
340
+ booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
341
+ year={2021}
342
+ }
343
+ ```
KAIR/data/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
KAIR/data/dataset_blindsr.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch.utils.data as data
4
+ import utils.utils_image as util
5
+ import os
6
+ from utils import utils_blindsr as blindsr
7
+
8
+
9
+ class DatasetBlindSR(data.Dataset):
10
+ '''
11
+ # -----------------------------------------
12
+ # dataset for BSRGAN
13
+ # -----------------------------------------
14
+ '''
15
+ def __init__(self, opt):
16
+ super(DatasetBlindSR, self).__init__()
17
+ self.opt = opt
18
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
19
+ self.sf = opt['scale'] if opt['scale'] else 4
20
+ self.shuffle_prob = opt['shuffle_prob'] if opt['shuffle_prob'] else 0.1
21
+ self.use_sharp = opt['use_sharp'] if opt['use_sharp'] else False
22
+ self.degradation_type = opt['degradation_type'] if opt['degradation_type'] else 'bsrgan'
23
+ self.lq_patchsize = self.opt['lq_patchsize'] if self.opt['lq_patchsize'] else 64
24
+ self.patch_size = self.opt['H_size'] if self.opt['H_size'] else self.lq_patchsize*self.sf
25
+
26
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
27
+ print(len(self.paths_H))
28
+
29
+ # for n, v in enumerate(self.paths_H):
30
+ # if 'face' in v:
31
+ # del self.paths_H[n]
32
+ # time.sleep(1)
33
+ assert self.paths_H, 'Error: H path is empty.'
34
+
35
+ def __getitem__(self, index):
36
+
37
+ L_path = None
38
+
39
+ # ------------------------------------
40
+ # get H image
41
+ # ------------------------------------
42
+ H_path = self.paths_H[index]
43
+ img_H = util.imread_uint(H_path, self.n_channels)
44
+ img_name, ext = os.path.splitext(os.path.basename(H_path))
45
+ H, W, C = img_H.shape
46
+
47
+ if H < self.patch_size or W < self.patch_size:
48
+ img_H = np.tile(np.random.randint(0, 256, size=[1, 1, self.n_channels], dtype=np.uint8), (self.patch_size, self.patch_size, 1))
49
+
50
+ # ------------------------------------
51
+ # if train, get L/H patch pair
52
+ # ------------------------------------
53
+ if self.opt['phase'] == 'train':
54
+
55
+ H, W, C = img_H.shape
56
+
57
+ rnd_h_H = random.randint(0, max(0, H - self.patch_size))
58
+ rnd_w_H = random.randint(0, max(0, W - self.patch_size))
59
+ img_H = img_H[rnd_h_H:rnd_h_H + self.patch_size, rnd_w_H:rnd_w_H + self.patch_size, :]
60
+
61
+ if 'face' in img_name:
62
+ mode = random.choice([0, 4])
63
+ img_H = util.augment_img(img_H, mode=mode)
64
+ else:
65
+ mode = random.randint(0, 7)
66
+ img_H = util.augment_img(img_H, mode=mode)
67
+
68
+ img_H = util.uint2single(img_H)
69
+ if self.degradation_type == 'bsrgan':
70
+ img_L, img_H = blindsr.degradation_bsrgan(img_H, self.sf, lq_patchsize=self.lq_patchsize, isp_model=None)
71
+ elif self.degradation_type == 'bsrgan_plus':
72
+ img_L, img_H = blindsr.degradation_bsrgan_plus(img_H, self.sf, shuffle_prob=self.shuffle_prob, use_sharp=self.use_sharp, lq_patchsize=self.lq_patchsize)
73
+
74
+ else:
75
+ img_H = util.uint2single(img_H)
76
+ if self.degradation_type == 'bsrgan':
77
+ img_L, img_H = blindsr.degradation_bsrgan(img_H, self.sf, lq_patchsize=self.lq_patchsize, isp_model=None)
78
+ elif self.degradation_type == 'bsrgan_plus':
79
+ img_L, img_H = blindsr.degradation_bsrgan_plus(img_H, self.sf, shuffle_prob=self.shuffle_prob, use_sharp=self.use_sharp, lq_patchsize=self.lq_patchsize)
80
+
81
+ # ------------------------------------
82
+ # L/H pairs, HWC to CHW, numpy to tensor
83
+ # ------------------------------------
84
+ img_H, img_L = util.single2tensor3(img_H), util.single2tensor3(img_L)
85
+
86
+ if L_path is None:
87
+ L_path = H_path
88
+
89
+ return {'L': img_L, 'H': img_H, 'L_path': L_path, 'H_path': H_path}
90
+
91
+ def __len__(self):
92
+ return len(self.paths_H)
KAIR/data/dataset_dncnn.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import random
3
+ import numpy as np
4
+ import torch
5
+ import torch.utils.data as data
6
+ import utils.utils_image as util
7
+
8
+
9
+ class DatasetDnCNN(data.Dataset):
10
+ """
11
+ # -----------------------------------------
12
+ # Get L/H for denosing on AWGN with fixed sigma.
13
+ # Only dataroot_H is needed.
14
+ # -----------------------------------------
15
+ # e.g., DnCNN
16
+ # -----------------------------------------
17
+ """
18
+
19
+ def __init__(self, opt):
20
+ super(DatasetDnCNN, self).__init__()
21
+ print('Dataset: Denosing on AWGN with fixed sigma. Only dataroot_H is needed.')
22
+ self.opt = opt
23
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
24
+ self.patch_size = opt['H_size'] if opt['H_size'] else 64
25
+ self.sigma = opt['sigma'] if opt['sigma'] else 25
26
+ self.sigma_test = opt['sigma_test'] if opt['sigma_test'] else self.sigma
27
+
28
+ # ------------------------------------
29
+ # get path of H
30
+ # return None if input is None
31
+ # ------------------------------------
32
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
33
+
34
+ def __getitem__(self, index):
35
+
36
+ # ------------------------------------
37
+ # get H image
38
+ # ------------------------------------
39
+ H_path = self.paths_H[index]
40
+ img_H = util.imread_uint(H_path, self.n_channels)
41
+
42
+ L_path = H_path
43
+
44
+ if self.opt['phase'] == 'train':
45
+ """
46
+ # --------------------------------
47
+ # get L/H patch pairs
48
+ # --------------------------------
49
+ """
50
+ H, W, _ = img_H.shape
51
+
52
+ # --------------------------------
53
+ # randomly crop the patch
54
+ # --------------------------------
55
+ rnd_h = random.randint(0, max(0, H - self.patch_size))
56
+ rnd_w = random.randint(0, max(0, W - self.patch_size))
57
+ patch_H = img_H[rnd_h:rnd_h + self.patch_size, rnd_w:rnd_w + self.patch_size, :]
58
+
59
+ # --------------------------------
60
+ # augmentation - flip, rotate
61
+ # --------------------------------
62
+ mode = random.randint(0, 7)
63
+ patch_H = util.augment_img(patch_H, mode=mode)
64
+
65
+ # --------------------------------
66
+ # HWC to CHW, numpy(uint) to tensor
67
+ # --------------------------------
68
+ img_H = util.uint2tensor3(patch_H)
69
+ img_L = img_H.clone()
70
+
71
+ # --------------------------------
72
+ # add noise
73
+ # --------------------------------
74
+ noise = torch.randn(img_L.size()).mul_(self.sigma/255.0)
75
+ img_L.add_(noise)
76
+
77
+ else:
78
+ """
79
+ # --------------------------------
80
+ # get L/H image pairs
81
+ # --------------------------------
82
+ """
83
+ img_H = util.uint2single(img_H)
84
+ img_L = np.copy(img_H)
85
+
86
+ # --------------------------------
87
+ # add noise
88
+ # --------------------------------
89
+ np.random.seed(seed=0)
90
+ img_L += np.random.normal(0, self.sigma_test/255.0, img_L.shape)
91
+
92
+ # --------------------------------
93
+ # HWC to CHW, numpy to tensor
94
+ # --------------------------------
95
+ img_L = util.single2tensor3(img_L)
96
+ img_H = util.single2tensor3(img_H)
97
+
98
+ return {'L': img_L, 'H': img_H, 'H_path': H_path, 'L_path': L_path}
99
+
100
+ def __len__(self):
101
+ return len(self.paths_H)
KAIR/data/dataset_dnpatch.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch
4
+ import torch.utils.data as data
5
+ import utils.utils_image as util
6
+
7
+
8
+ class DatasetDnPatch(data.Dataset):
9
+ """
10
+ # -----------------------------------------
11
+ # Get L/H for denosing on AWGN with fixed sigma.
12
+ # ****Get all H patches first****
13
+ # Only dataroot_H is needed.
14
+ # -----------------------------------------
15
+ # e.g., DnCNN with BSD400
16
+ # -----------------------------------------
17
+ """
18
+
19
+ def __init__(self, opt):
20
+ super(DatasetDnPatch, self).__init__()
21
+ print('Get L/H for denosing on AWGN with fixed sigma. Only dataroot_H is needed.')
22
+ self.opt = opt
23
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
24
+ self.patch_size = opt['H_size'] if opt['H_size'] else 64
25
+
26
+ self.sigma = opt['sigma'] if opt['sigma'] else 25
27
+ self.sigma_test = opt['sigma_test'] if opt['sigma_test'] else self.sigma
28
+
29
+ self.num_patches_per_image = opt['num_patches_per_image'] if opt['num_patches_per_image'] else 40
30
+ self.num_sampled = opt['num_sampled'] if opt['num_sampled'] else 3000
31
+
32
+ # ------------------------------------
33
+ # get paths of H
34
+ # ------------------------------------
35
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
36
+ assert self.paths_H, 'Error: H path is empty.'
37
+
38
+ # ------------------------------------
39
+ # number of sampled H images
40
+ # ------------------------------------
41
+ self.num_sampled = min(self.num_sampled, len(self.paths_H))
42
+
43
+ # ------------------------------------
44
+ # reserve space with zeros
45
+ # ------------------------------------
46
+ self.total_patches = self.num_sampled * self.num_patches_per_image
47
+ self.H_data = np.zeros([self.total_patches, self.patch_size, self.patch_size, self.n_channels], dtype=np.uint8)
48
+
49
+ # ------------------------------------
50
+ # update H patches
51
+ # ------------------------------------
52
+ self.update_data()
53
+
54
+ def update_data(self):
55
+ """
56
+ # ------------------------------------
57
+ # update whole H patches
58
+ # ------------------------------------
59
+ """
60
+ self.index_sampled = random.sample(range(0, len(self.paths_H), 1), self.num_sampled)
61
+ n_count = 0
62
+
63
+ for i in range(len(self.index_sampled)):
64
+ H_patches = self.get_patches(self.index_sampled[i])
65
+ for H_patch in H_patches:
66
+ self.H_data[n_count,:,:,:] = H_patch
67
+ n_count += 1
68
+
69
+ print('Training data updated! Total number of patches is: %5.2f X %5.2f = %5.2f\n' % (len(self.H_data)//128, 128, len(self.H_data)))
70
+
71
+ def get_patches(self, index):
72
+ """
73
+ # ------------------------------------
74
+ # get H patches from an H image
75
+ # ------------------------------------
76
+ """
77
+ H_path = self.paths_H[index]
78
+ img_H = util.imread_uint(H_path, self.n_channels) # uint format
79
+
80
+ H, W = img_H.shape[:2]
81
+
82
+ H_patches = []
83
+
84
+ num = self.num_patches_per_image
85
+ for _ in range(num):
86
+ rnd_h = random.randint(0, max(0, H - self.patch_size))
87
+ rnd_w = random.randint(0, max(0, W - self.patch_size))
88
+ H_patch = img_H[rnd_h:rnd_h + self.patch_size, rnd_w:rnd_w + self.patch_size, :]
89
+ H_patches.append(H_patch)
90
+
91
+ return H_patches
92
+
93
+ def __getitem__(self, index):
94
+
95
+ H_path = 'toy.png'
96
+ if self.opt['phase'] == 'train':
97
+
98
+ patch_H = self.H_data[index]
99
+
100
+ # --------------------------------
101
+ # augmentation - flip and/or rotate
102
+ # --------------------------------
103
+ mode = random.randint(0, 7)
104
+ patch_H = util.augment_img(patch_H, mode=mode)
105
+
106
+ patch_H = util.uint2tensor3(patch_H)
107
+ patch_L = patch_H.clone()
108
+
109
+ # ------------------------------------
110
+ # add noise
111
+ # ------------------------------------
112
+ noise = torch.randn(patch_L.size()).mul_(self.sigma/255.0)
113
+ patch_L.add_(noise)
114
+
115
+ else:
116
+
117
+ H_path = self.paths_H[index]
118
+ img_H = util.imread_uint(H_path, self.n_channels)
119
+ img_H = util.uint2single(img_H)
120
+ img_L = np.copy(img_H)
121
+
122
+ # ------------------------------------
123
+ # add noise
124
+ # ------------------------------------
125
+ np.random.seed(seed=0)
126
+ img_L += np.random.normal(0, self.sigma_test/255.0, img_L.shape)
127
+ patch_L, patch_H = util.single2tensor3(img_L), util.single2tensor3(img_H)
128
+
129
+ L_path = H_path
130
+ return {'L': patch_L, 'H': patch_H, 'L_path': L_path, 'H_path': H_path}
131
+
132
+ def __len__(self):
133
+ return len(self.H_data)
KAIR/data/dataset_dpsr.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch
4
+ import torch.utils.data as data
5
+ import utils.utils_image as util
6
+
7
+
8
+ class DatasetDPSR(data.Dataset):
9
+ '''
10
+ # -----------------------------------------
11
+ # Get L/H/M for noisy image SR.
12
+ # Only "paths_H" is needed, sythesize bicubicly downsampled L on-the-fly.
13
+ # -----------------------------------------
14
+ # e.g., SRResNet super-resolver prior for DPSR
15
+ # -----------------------------------------
16
+ '''
17
+
18
+ def __init__(self, opt):
19
+ super(DatasetDPSR, self).__init__()
20
+ self.opt = opt
21
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
22
+ self.sf = opt['scale'] if opt['scale'] else 4
23
+ self.patch_size = self.opt['H_size'] if self.opt['H_size'] else 96
24
+ self.L_size = self.patch_size // self.sf
25
+ self.sigma = opt['sigma'] if opt['sigma'] else [0, 50]
26
+ self.sigma_min, self.sigma_max = self.sigma[0], self.sigma[1]
27
+ self.sigma_test = opt['sigma_test'] if opt['sigma_test'] else 0
28
+
29
+ # ------------------------------------
30
+ # get paths of L/H
31
+ # ------------------------------------
32
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
33
+ self.paths_L = util.get_image_paths(opt['dataroot_L'])
34
+
35
+ assert self.paths_H, 'Error: H path is empty.'
36
+
37
+ def __getitem__(self, index):
38
+
39
+ # ------------------------------------
40
+ # get H image
41
+ # ------------------------------------
42
+ H_path = self.paths_H[index]
43
+ img_H = util.imread_uint(H_path, self.n_channels)
44
+ img_H = util.uint2single(img_H)
45
+
46
+ # ------------------------------------
47
+ # modcrop for SR
48
+ # ------------------------------------
49
+ img_H = util.modcrop(img_H, self.sf)
50
+
51
+ # ------------------------------------
52
+ # sythesize L image via matlab's bicubic
53
+ # ------------------------------------
54
+ H, W, _ = img_H.shape
55
+ img_L = util.imresize_np(img_H, 1 / self.sf, True)
56
+
57
+ if self.opt['phase'] == 'train':
58
+ """
59
+ # --------------------------------
60
+ # get L/H patch pairs
61
+ # --------------------------------
62
+ """
63
+ H, W, C = img_L.shape
64
+
65
+ # --------------------------------
66
+ # randomly crop L patch
67
+ # --------------------------------
68
+ rnd_h = random.randint(0, max(0, H - self.L_size))
69
+ rnd_w = random.randint(0, max(0, W - self.L_size))
70
+ img_L = img_L[rnd_h:rnd_h + self.L_size, rnd_w:rnd_w + self.L_size, :]
71
+
72
+ # --------------------------------
73
+ # crop corresponding H patch
74
+ # --------------------------------
75
+ rnd_h_H, rnd_w_H = int(rnd_h * self.sf), int(rnd_w * self.sf)
76
+ img_H = img_H[rnd_h_H:rnd_h_H + self.patch_size, rnd_w_H:rnd_w_H + self.patch_size, :]
77
+
78
+ # --------------------------------
79
+ # augmentation - flip and/or rotate
80
+ # --------------------------------
81
+ mode = random.randint(0, 7)
82
+ img_L, img_H = util.augment_img(img_L, mode=mode), util.augment_img(img_H, mode=mode)
83
+
84
+ # --------------------------------
85
+ # get patch pairs
86
+ # --------------------------------
87
+ img_H, img_L = util.single2tensor3(img_H), util.single2tensor3(img_L)
88
+
89
+ # --------------------------------
90
+ # select noise level and get Gaussian noise
91
+ # --------------------------------
92
+ if random.random() < 0.1:
93
+ noise_level = torch.zeros(1).float()
94
+ else:
95
+ noise_level = torch.FloatTensor([np.random.uniform(self.sigma_min, self.sigma_max)])/255.0
96
+ # noise_level = torch.rand(1)*50/255.0
97
+ # noise_level = torch.min(torch.from_numpy(np.float32([7*np.random.chisquare(2.5)/255.0])),torch.Tensor([50./255.]))
98
+
99
+ else:
100
+
101
+ img_H, img_L = util.single2tensor3(img_H), util.single2tensor3(img_L)
102
+
103
+ noise_level = torch.FloatTensor([self.sigma_test])
104
+
105
+ # ------------------------------------
106
+ # add noise
107
+ # ------------------------------------
108
+ noise = torch.randn(img_L.size()).mul_(noise_level).float()
109
+ img_L.add_(noise)
110
+
111
+ # ------------------------------------
112
+ # get noise level map M
113
+ # ------------------------------------
114
+ M_vector = noise_level.unsqueeze(1).unsqueeze(1)
115
+ M = M_vector.repeat(1, img_L.size()[-2], img_L.size()[-1])
116
+
117
+
118
+ """
119
+ # -------------------------------------
120
+ # concat L and noise level map M
121
+ # -------------------------------------
122
+ """
123
+ img_L = torch.cat((img_L, M), 0)
124
+
125
+
126
+ L_path = H_path
127
+
128
+ return {'L': img_L, 'H': img_H, 'L_path': L_path, 'H_path': H_path}
129
+
130
+ def __len__(self):
131
+ return len(self.paths_H)
KAIR/data/dataset_fdncnn.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch
4
+ import torch.utils.data as data
5
+ import utils.utils_image as util
6
+
7
+
8
+ class DatasetFDnCNN(data.Dataset):
9
+ """
10
+ # -----------------------------------------
11
+ # Get L/H/M for denosing on AWGN with a range of sigma.
12
+ # Only dataroot_H is needed.
13
+ # -----------------------------------------
14
+ # e.g., FDnCNN, H = f(cat(L, M)), M is noise level map
15
+ # -----------------------------------------
16
+ """
17
+
18
+ def __init__(self, opt):
19
+ super(DatasetFDnCNN, self).__init__()
20
+ self.opt = opt
21
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
22
+ self.patch_size = self.opt['H_size'] if opt['H_size'] else 64
23
+ self.sigma = opt['sigma'] if opt['sigma'] else [0, 75]
24
+ self.sigma_min, self.sigma_max = self.sigma[0], self.sigma[1]
25
+ self.sigma_test = opt['sigma_test'] if opt['sigma_test'] else 25
26
+
27
+ # -------------------------------------
28
+ # get the path of H, return None if input is None
29
+ # -------------------------------------
30
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
31
+
32
+ def __getitem__(self, index):
33
+ # -------------------------------------
34
+ # get H image
35
+ # -------------------------------------
36
+ H_path = self.paths_H[index]
37
+ img_H = util.imread_uint(H_path, self.n_channels)
38
+
39
+ L_path = H_path
40
+
41
+ if self.opt['phase'] == 'train':
42
+ """
43
+ # --------------------------------
44
+ # get L/H/M patch pairs
45
+ # --------------------------------
46
+ """
47
+ H, W = img_H.shape[:2]
48
+
49
+ # ---------------------------------
50
+ # randomly crop the patch
51
+ # ---------------------------------
52
+ rnd_h = random.randint(0, max(0, H - self.patch_size))
53
+ rnd_w = random.randint(0, max(0, W - self.patch_size))
54
+ patch_H = img_H[rnd_h:rnd_h + self.patch_size, rnd_w:rnd_w + self.patch_size, :]
55
+
56
+ # ---------------------------------
57
+ # augmentation - flip, rotate
58
+ # ---------------------------------
59
+ mode = random.randint(0, 7)
60
+ patch_H = util.augment_img(patch_H, mode=mode)
61
+
62
+ # ---------------------------------
63
+ # HWC to CHW, numpy(uint) to tensor
64
+ # ---------------------------------
65
+ img_H = util.uint2tensor3(patch_H)
66
+ img_L = img_H.clone()
67
+
68
+ # ---------------------------------
69
+ # get noise level
70
+ # ---------------------------------
71
+ # noise_level = torch.FloatTensor([np.random.randint(self.sigma_min, self.sigma_max)])/255.0
72
+ noise_level = torch.FloatTensor([np.random.uniform(self.sigma_min, self.sigma_max)])/255.0
73
+
74
+ noise_level_map = torch.ones((1, img_L.size(1), img_L.size(2))).mul_(noise_level).float() # torch.full((1, img_L.size(1), img_L.size(2)), noise_level)
75
+
76
+ # ---------------------------------
77
+ # add noise
78
+ # ---------------------------------
79
+ noise = torch.randn(img_L.size()).mul_(noise_level).float()
80
+ img_L.add_(noise)
81
+
82
+ else:
83
+ """
84
+ # --------------------------------
85
+ # get L/H/M image pairs
86
+ # --------------------------------
87
+ """
88
+ img_H = util.uint2single(img_H)
89
+ img_L = np.copy(img_H)
90
+ np.random.seed(seed=0)
91
+ img_L += np.random.normal(0, self.sigma_test/255.0, img_L.shape)
92
+ noise_level_map = torch.ones((1, img_L.shape[0], img_L.shape[1])).mul_(self.sigma_test/255.0).float() # torch.full((1, img_L.size(1), img_L.size(2)), noise_level)
93
+
94
+ # ---------------------------------
95
+ # L/H image pairs
96
+ # ---------------------------------
97
+ img_H, img_L = util.single2tensor3(img_H), util.single2tensor3(img_L)
98
+
99
+ """
100
+ # -------------------------------------
101
+ # concat L and noise level map M
102
+ # -------------------------------------
103
+ """
104
+ img_L = torch.cat((img_L, noise_level_map), 0)
105
+
106
+ return {'L': img_L, 'H': img_H, 'L_path': L_path, 'H_path': H_path}
107
+
108
+ def __len__(self):
109
+ return len(self.paths_H)
KAIR/data/dataset_ffdnet.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch
4
+ import torch.utils.data as data
5
+ import utils.utils_image as util
6
+
7
+
8
+ class DatasetFFDNet(data.Dataset):
9
+ """
10
+ # -----------------------------------------
11
+ # Get L/H/M for denosing on AWGN with a range of sigma.
12
+ # Only dataroot_H is needed.
13
+ # -----------------------------------------
14
+ # e.g., FFDNet, H = f(L, sigma), sigma is noise level
15
+ # -----------------------------------------
16
+ """
17
+
18
+ def __init__(self, opt):
19
+ super(DatasetFFDNet, self).__init__()
20
+ self.opt = opt
21
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
22
+ self.patch_size = self.opt['H_size'] if opt['H_size'] else 64
23
+ self.sigma = opt['sigma'] if opt['sigma'] else [0, 75]
24
+ self.sigma_min, self.sigma_max = self.sigma[0], self.sigma[1]
25
+ self.sigma_test = opt['sigma_test'] if opt['sigma_test'] else 25
26
+
27
+ # -------------------------------------
28
+ # get the path of H, return None if input is None
29
+ # -------------------------------------
30
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
31
+
32
+ def __getitem__(self, index):
33
+ # -------------------------------------
34
+ # get H image
35
+ # -------------------------------------
36
+ H_path = self.paths_H[index]
37
+ img_H = util.imread_uint(H_path, self.n_channels)
38
+
39
+ L_path = H_path
40
+
41
+ if self.opt['phase'] == 'train':
42
+ """
43
+ # --------------------------------
44
+ # get L/H/M patch pairs
45
+ # --------------------------------
46
+ """
47
+ H, W = img_H.shape[:2]
48
+
49
+ # ---------------------------------
50
+ # randomly crop the patch
51
+ # ---------------------------------
52
+ rnd_h = random.randint(0, max(0, H - self.patch_size))
53
+ rnd_w = random.randint(0, max(0, W - self.patch_size))
54
+ patch_H = img_H[rnd_h:rnd_h + self.patch_size, rnd_w:rnd_w + self.patch_size, :]
55
+
56
+ # ---------------------------------
57
+ # augmentation - flip, rotate
58
+ # ---------------------------------
59
+ mode = random.randint(0, 7)
60
+ patch_H = util.augment_img(patch_H, mode=mode)
61
+
62
+ # ---------------------------------
63
+ # HWC to CHW, numpy(uint) to tensor
64
+ # ---------------------------------
65
+ img_H = util.uint2tensor3(patch_H)
66
+ img_L = img_H.clone()
67
+
68
+ # ---------------------------------
69
+ # get noise level
70
+ # ---------------------------------
71
+ # noise_level = torch.FloatTensor([np.random.randint(self.sigma_min, self.sigma_max)])/255.0
72
+ noise_level = torch.FloatTensor([np.random.uniform(self.sigma_min, self.sigma_max)])/255.0
73
+
74
+ # ---------------------------------
75
+ # add noise
76
+ # ---------------------------------
77
+ noise = torch.randn(img_L.size()).mul_(noise_level).float()
78
+ img_L.add_(noise)
79
+
80
+ else:
81
+ """
82
+ # --------------------------------
83
+ # get L/H/sigma image pairs
84
+ # --------------------------------
85
+ """
86
+ img_H = util.uint2single(img_H)
87
+ img_L = np.copy(img_H)
88
+ np.random.seed(seed=0)
89
+ img_L += np.random.normal(0, self.sigma_test/255.0, img_L.shape)
90
+ noise_level = torch.FloatTensor([self.sigma_test/255.0])
91
+
92
+ # ---------------------------------
93
+ # L/H image pairs
94
+ # ---------------------------------
95
+ img_H, img_L = util.single2tensor3(img_H), util.single2tensor3(img_L)
96
+
97
+ noise_level = noise_level.unsqueeze(1).unsqueeze(1)
98
+
99
+
100
+ return {'L': img_L, 'H': img_H, 'C': noise_level, 'L_path': L_path, 'H_path': H_path}
101
+
102
+ def __len__(self):
103
+ return len(self.paths_H)
KAIR/data/dataset_jpeg.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import torch.utils.data as data
3
+ import utils.utils_image as util
4
+ import cv2
5
+
6
+
7
+ class DatasetJPEG(data.Dataset):
8
+ def __init__(self, opt):
9
+ super(DatasetJPEG, self).__init__()
10
+ print('Dataset: JPEG compression artifact reduction (deblocking) with quality factor. Only dataroot_H is needed.')
11
+ self.opt = opt
12
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
13
+ self.patch_size = self.opt['H_size'] if opt['H_size'] else 128
14
+
15
+ self.quality_factor = opt['quality_factor'] if opt['quality_factor'] else 40
16
+ self.quality_factor_test = opt['quality_factor_test'] if opt['quality_factor_test'] else 40
17
+ self.is_color = opt['is_color'] if opt['is_color'] else False
18
+
19
+ # -------------------------------------
20
+ # get the path of H, return None if input is None
21
+ # -------------------------------------
22
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
23
+
24
+ def __getitem__(self, index):
25
+
26
+ if self.opt['phase'] == 'train':
27
+ # -------------------------------------
28
+ # get H image
29
+ # -------------------------------------
30
+ H_path = self.paths_H[index]
31
+ img_H = util.imread_uint(H_path, 3)
32
+ L_path = H_path
33
+
34
+ H, W = img_H.shape[:2]
35
+ self.patch_size_plus = self.patch_size + 8
36
+
37
+ # ---------------------------------
38
+ # randomly crop a large patch
39
+ # ---------------------------------
40
+ rnd_h = random.randint(0, max(0, H - self.patch_size_plus))
41
+ rnd_w = random.randint(0, max(0, W - self.patch_size_plus))
42
+ patch_H = img_H[rnd_h:rnd_h + self.patch_size_plus, rnd_w:rnd_w + self.patch_size_plus, ...]
43
+
44
+ # ---------------------------------
45
+ # augmentation - flip, rotate
46
+ # ---------------------------------
47
+ mode = random.randint(0, 7)
48
+ patch_H = util.augment_img(patch_H, mode=mode)
49
+
50
+ # ---------------------------------
51
+ # HWC to CHW, numpy(uint) to tensor
52
+ # ---------------------------------
53
+ img_L = patch_H.copy()
54
+
55
+ # ---------------------------------
56
+ # set quality factor
57
+ # ---------------------------------
58
+ quality_factor = self.quality_factor
59
+
60
+ if self.is_color: # color image
61
+ img_H = img_L.copy()
62
+ img_L = cv2.cvtColor(img_L, cv2.COLOR_RGB2BGR)
63
+ result, encimg = cv2.imencode('.jpg', img_L, [int(cv2.IMWRITE_JPEG_QUALITY), quality_factor])
64
+ img_L = cv2.imdecode(encimg, 1)
65
+ img_L = cv2.cvtColor(img_L, cv2.COLOR_BGR2RGB)
66
+ else:
67
+ if random.random() > 0.5:
68
+ img_L = util.rgb2ycbcr(img_L)
69
+ else:
70
+ img_L = cv2.cvtColor(img_L, cv2.COLOR_RGB2GRAY)
71
+ img_H = img_L.copy()
72
+ result, encimg = cv2.imencode('.jpg', img_L, [int(cv2.IMWRITE_JPEG_QUALITY), quality_factor])
73
+ img_L = cv2.imdecode(encimg, 0)
74
+
75
+ # ---------------------------------
76
+ # randomly crop a patch
77
+ # ---------------------------------
78
+ H, W = img_H.shape[:2]
79
+ if random.random() > 0.5:
80
+ rnd_h = random.randint(0, max(0, H - self.patch_size))
81
+ rnd_w = random.randint(0, max(0, W - self.patch_size))
82
+ else:
83
+ rnd_h = 0
84
+ rnd_w = 0
85
+ img_H = img_H[rnd_h:rnd_h + self.patch_size, rnd_w:rnd_w + self.patch_size]
86
+ img_L = img_L[rnd_h:rnd_h + self.patch_size, rnd_w:rnd_w + self.patch_size]
87
+ else:
88
+
89
+ H_path = self.paths_H[index]
90
+ L_path = H_path
91
+ # ---------------------------------
92
+ # set quality factor
93
+ # ---------------------------------
94
+ quality_factor = self.quality_factor_test
95
+
96
+ if self.is_color: # color JPEG image deblocking
97
+ img_H = util.imread_uint(H_path, 3)
98
+ img_L = img_H.copy()
99
+ img_L = cv2.cvtColor(img_L, cv2.COLOR_RGB2BGR)
100
+ result, encimg = cv2.imencode('.jpg', img_L, [int(cv2.IMWRITE_JPEG_QUALITY), quality_factor])
101
+ img_L = cv2.imdecode(encimg, 1)
102
+ img_L = cv2.cvtColor(img_L, cv2.COLOR_BGR2RGB)
103
+ else:
104
+ img_H = cv2.imread(H_path, cv2.IMREAD_UNCHANGED)
105
+ is_to_ycbcr = True if img_L.ndim == 3 else False
106
+ if is_to_ycbcr:
107
+ img_H = cv2.cvtColor(img_H, cv2.COLOR_BGR2RGB)
108
+ img_H = util.rgb2ycbcr(img_H)
109
+
110
+ result, encimg = cv2.imencode('.jpg', img_H, [int(cv2.IMWRITE_JPEG_QUALITY), quality_factor])
111
+ img_L = cv2.imdecode(encimg, 0)
112
+
113
+ img_L, img_H = util.uint2tensor3(img_L), util.uint2tensor3(img_H)
114
+
115
+ return {'L': img_L, 'H': img_H, 'L_path': L_path, 'H_path': H_path}
116
+
117
+ def __len__(self):
118
+ return len(self.paths_H)
KAIR/data/dataset_l.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.utils.data as data
2
+ import utils.utils_image as util
3
+
4
+
5
+ class DatasetL(data.Dataset):
6
+ '''
7
+ # -----------------------------------------
8
+ # Get L in testing.
9
+ # Only "dataroot_L" is needed.
10
+ # -----------------------------------------
11
+ # -----------------------------------------
12
+ '''
13
+
14
+ def __init__(self, opt):
15
+ super(DatasetL, self).__init__()
16
+ print('Read L in testing. Only "dataroot_L" is needed.')
17
+ self.opt = opt
18
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
19
+
20
+ # ------------------------------------
21
+ # get the path of L
22
+ # ------------------------------------
23
+ self.paths_L = util.get_image_paths(opt['dataroot_L'])
24
+ assert self.paths_L, 'Error: L paths are empty.'
25
+
26
+ def __getitem__(self, index):
27
+ L_path = None
28
+
29
+ # ------------------------------------
30
+ # get L image
31
+ # ------------------------------------
32
+ L_path = self.paths_L[index]
33
+ img_L = util.imread_uint(L_path, self.n_channels)
34
+
35
+ # ------------------------------------
36
+ # HWC to CHW, numpy to tensor
37
+ # ------------------------------------
38
+ img_L = util.uint2tensor3(img_L)
39
+
40
+ return {'L': img_L, 'L_path': L_path}
41
+
42
+ def __len__(self):
43
+ return len(self.paths_L)
KAIR/data/dataset_plain.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch.utils.data as data
4
+ import utils.utils_image as util
5
+
6
+
7
+ class DatasetPlain(data.Dataset):
8
+ '''
9
+ # -----------------------------------------
10
+ # Get L/H for image-to-image mapping.
11
+ # Both "paths_L" and "paths_H" are needed.
12
+ # -----------------------------------------
13
+ # e.g., train denoiser with L and H
14
+ # -----------------------------------------
15
+ '''
16
+
17
+ def __init__(self, opt):
18
+ super(DatasetPlain, self).__init__()
19
+ print('Get L/H for image-to-image mapping. Both "paths_L" and "paths_H" are needed.')
20
+ self.opt = opt
21
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
22
+ self.patch_size = self.opt['H_size'] if self.opt['H_size'] else 64
23
+
24
+ # ------------------------------------
25
+ # get the path of L/H
26
+ # ------------------------------------
27
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
28
+ self.paths_L = util.get_image_paths(opt['dataroot_L'])
29
+
30
+ assert self.paths_H, 'Error: H path is empty.'
31
+ assert self.paths_L, 'Error: L path is empty. Plain dataset assumes both L and H are given!'
32
+ if self.paths_L and self.paths_H:
33
+ assert len(self.paths_L) == len(self.paths_H), 'L/H mismatch - {}, {}.'.format(len(self.paths_L), len(self.paths_H))
34
+
35
+ def __getitem__(self, index):
36
+
37
+ # ------------------------------------
38
+ # get H image
39
+ # ------------------------------------
40
+ H_path = self.paths_H[index]
41
+ img_H = util.imread_uint(H_path, self.n_channels)
42
+
43
+ # ------------------------------------
44
+ # get L image
45
+ # ------------------------------------
46
+ L_path = self.paths_L[index]
47
+ img_L = util.imread_uint(L_path, self.n_channels)
48
+
49
+ # ------------------------------------
50
+ # if train, get L/H patch pair
51
+ # ------------------------------------
52
+ if self.opt['phase'] == 'train':
53
+
54
+ H, W, _ = img_H.shape
55
+
56
+ # --------------------------------
57
+ # randomly crop the patch
58
+ # --------------------------------
59
+ rnd_h = random.randint(0, max(0, H - self.patch_size))
60
+ rnd_w = random.randint(0, max(0, W - self.patch_size))
61
+ patch_L = img_L[rnd_h:rnd_h + self.patch_size, rnd_w:rnd_w + self.patch_size, :]
62
+ patch_H = img_H[rnd_h:rnd_h + self.patch_size, rnd_w:rnd_w + self.patch_size, :]
63
+
64
+ # --------------------------------
65
+ # augmentation - flip and/or rotate
66
+ # --------------------------------
67
+ mode = random.randint(0, 7)
68
+ patch_L, patch_H = util.augment_img(patch_L, mode=mode), util.augment_img(patch_H, mode=mode)
69
+
70
+ # --------------------------------
71
+ # HWC to CHW, numpy(uint) to tensor
72
+ # --------------------------------
73
+ img_L, img_H = util.uint2tensor3(patch_L), util.uint2tensor3(patch_H)
74
+
75
+ else:
76
+
77
+ # --------------------------------
78
+ # HWC to CHW, numpy(uint) to tensor
79
+ # --------------------------------
80
+ img_L, img_H = util.uint2tensor3(img_L), util.uint2tensor3(img_H)
81
+
82
+ return {'L': img_L, 'H': img_H, 'L_path': L_path, 'H_path': H_path}
83
+
84
+ def __len__(self):
85
+ return len(self.paths_H)
KAIR/data/dataset_plainpatch.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import random
3
+ import numpy as np
4
+ import torch.utils.data as data
5
+ import utils.utils_image as util
6
+
7
+
8
+
9
+ class DatasetPlainPatch(data.Dataset):
10
+ '''
11
+ # -----------------------------------------
12
+ # Get L/H for image-to-image mapping.
13
+ # Both "paths_L" and "paths_H" are needed.
14
+ # -----------------------------------------
15
+ # e.g., train denoiser with L and H patches
16
+ # create a large patch dataset first
17
+ # -----------------------------------------
18
+ '''
19
+
20
+ def __init__(self, opt):
21
+ super(DatasetPlainPatch, self).__init__()
22
+ print('Get L/H for image-to-image mapping. Both "paths_L" and "paths_H" are needed.')
23
+ self.opt = opt
24
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
25
+ self.patch_size = self.opt['H_size'] if self.opt['H_size'] else 64
26
+
27
+ self.num_patches_per_image = opt['num_patches_per_image'] if opt['num_patches_per_image'] else 40
28
+ self.num_sampled = opt['num_sampled'] if opt['num_sampled'] else 3000
29
+
30
+ # -------------------
31
+ # get the path of L/H
32
+ # -------------------
33
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
34
+ self.paths_L = util.get_image_paths(opt['dataroot_L'])
35
+
36
+ assert self.paths_H, 'Error: H path is empty.'
37
+ assert self.paths_L, 'Error: L path is empty. This dataset uses L path, you can use dataset_dnpatchh'
38
+ if self.paths_L and self.paths_H:
39
+ assert len(self.paths_L) == len(self.paths_H), 'H and L datasets have different number of images - {}, {}.'.format(len(self.paths_L), len(self.paths_H))
40
+
41
+ # ------------------------------------
42
+ # number of sampled images
43
+ # ------------------------------------
44
+ self.num_sampled = min(self.num_sampled, len(self.paths_H))
45
+
46
+ # ------------------------------------
47
+ # reserve space with zeros
48
+ # ------------------------------------
49
+ self.total_patches = self.num_sampled * self.num_patches_per_image
50
+ self.H_data = np.zeros([self.total_patches, self.path_size, self.path_size, self.n_channels], dtype=np.uint8)
51
+ self.L_data = np.zeros([self.total_patches, self.path_size, self.path_size, self.n_channels], dtype=np.uint8)
52
+
53
+ # ------------------------------------
54
+ # update H patches
55
+ # ------------------------------------
56
+ self.update_data()
57
+
58
+
59
+ def update_data(self):
60
+ """
61
+ # ------------------------------------
62
+ # update whole L/H patches
63
+ # ------------------------------------
64
+ """
65
+ self.index_sampled = random.sample(range(0, len(self.paths_H), 1), self.num_sampled)
66
+ n_count = 0
67
+
68
+ for i in range(len(self.index_sampled)):
69
+ L_patches, H_patches = self.get_patches(self.index_sampled[i])
70
+ for (L_patch, H_patch) in zip(L_patches, H_patches):
71
+ self.L_data[n_count,:,:,:] = L_patch
72
+ self.H_data[n_count,:,:,:] = H_patch
73
+ n_count += 1
74
+
75
+ print('Training data updated! Total number of patches is: %5.2f X %5.2f = %5.2f\n' % (len(self.H_data)//128, 128, len(self.H_data)))
76
+
77
+ def get_patches(self, index):
78
+ """
79
+ # ------------------------------------
80
+ # get L/H patches from L/H images
81
+ # ------------------------------------
82
+ """
83
+ L_path = self.paths_L[index]
84
+ H_path = self.paths_H[index]
85
+ img_L = util.imread_uint(L_path, self.n_channels) # uint format
86
+ img_H = util.imread_uint(H_path, self.n_channels) # uint format
87
+
88
+ H, W = img_H.shape[:2]
89
+
90
+ L_patches, H_patches = [], []
91
+
92
+ num = self.num_patches_per_image
93
+ for _ in range(num):
94
+ rnd_h = random.randint(0, max(0, H - self.path_size))
95
+ rnd_w = random.randint(0, max(0, W - self.path_size))
96
+ L_patch = img_L[rnd_h:rnd_h + self.path_size, rnd_w:rnd_w + self.path_size, :]
97
+ H_patch = img_H[rnd_h:rnd_h + self.path_size, rnd_w:rnd_w + self.path_size, :]
98
+ L_patches.append(L_patch)
99
+ H_patches.append(H_patch)
100
+
101
+ return L_patches, H_patches
102
+
103
+ def __getitem__(self, index):
104
+
105
+ if self.opt['phase'] == 'train':
106
+
107
+ patch_L, patch_H = self.L_data[index], self.H_data[index]
108
+
109
+ # --------------------------------
110
+ # augmentation - flip and/or rotate
111
+ # --------------------------------
112
+ mode = random.randint(0, 7)
113
+ patch_L = util.augment_img(patch_L, mode=mode)
114
+ patch_H = util.augment_img(patch_H, mode=mode)
115
+
116
+ patch_L, patch_H = util.uint2tensor3(patch_L), util.uint2tensor3(patch_H)
117
+
118
+ else:
119
+
120
+ L_path, H_path = self.paths_L[index], self.paths_H[index]
121
+ patch_L = util.imread_uint(L_path, self.n_channels)
122
+ patch_H = util.imread_uint(H_path, self.n_channels)
123
+
124
+ patch_L, patch_H = util.uint2tensor3(patch_L), util.uint2tensor3(patch_H)
125
+
126
+ return {'L': patch_L, 'H': patch_H}
127
+
128
+
129
+ def __len__(self):
130
+
131
+ return self.total_patches
KAIR/data/dataset_sr.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import numpy as np
3
+ import random
4
+ import torch
5
+ import torch.utils.data as data
6
+ import utils.utils_image as util
7
+ from basicsr.data.degradations import circular_lowpass_kernel, random_mixed_kernels
8
+ from basicsr.utils import DiffJPEG, USMSharp
9
+ from numpy.typing import NDArray
10
+ from PIL import Image
11
+ from utils.utils_video import img2tensor
12
+ from torch import Tensor
13
+
14
+ from data.degradations import apply_real_esrgan_degradations
15
+
16
+ class DatasetSR(data.Dataset):
17
+ '''
18
+ # -----------------------------------------
19
+ # Get L/H for SISR.
20
+ # If only "paths_H" is provided, sythesize bicubicly downsampled L on-the-fly.
21
+ # -----------------------------------------
22
+ # e.g., SRResNet
23
+ # -----------------------------------------
24
+ '''
25
+
26
+ def __init__(self, opt):
27
+ super(DatasetSR, self).__init__()
28
+ self.opt = opt
29
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
30
+ self.sf = opt['scale'] if opt['scale'] else 4
31
+ self.patch_size = self.opt['H_size'] if self.opt['H_size'] else 96
32
+ self.L_size = self.patch_size // self.sf
33
+
34
+ # ------------------------------------
35
+ # get paths of L/H
36
+ # ------------------------------------
37
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
38
+ self.paths_L = util.get_image_paths(opt['dataroot_L'])
39
+
40
+ assert self.paths_H, 'Error: H path is empty.'
41
+ if self.paths_L and self.paths_H:
42
+ assert len(self.paths_L) == len(self.paths_H), 'L/H mismatch - {}, {}.'.format(len(self.paths_L), len(self.paths_H))
43
+
44
+ self.jpeg_simulator = DiffJPEG()
45
+ self.usm_sharpener = USMSharp()
46
+
47
+ blur_kernel_list1 = ['iso', 'aniso', 'generalized_iso',
48
+ 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
49
+ blur_kernel_list2 = ['iso', 'aniso', 'generalized_iso',
50
+ 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
51
+ blur_kernel_prob1 = [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
52
+ blur_kernel_prob2 = [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
53
+ kernel_size = 21
54
+ blur_sigma1 = [0.05, 0.2]
55
+ blur_sigma2 = [0.05, 0.1]
56
+ betag_range1 = [0.7, 1.3]
57
+ betag_range2 = [0.7, 1.3]
58
+ betap_range1 = [0.7, 1.3]
59
+ betap_range2 = [0.7, 1.3]
60
+
61
+ def _decide_kernels(self) -> NDArray:
62
+ blur_kernel1 = random_mixed_kernels(
63
+ self.blur_kernel_list1,
64
+ self.blur_kernel_prob1,
65
+ self.kernel_size,
66
+ self.blur_sigma1,
67
+ self.blur_sigma1, [-math.pi, math.pi],
68
+ self.betag_range1,
69
+ self.betap_range1,
70
+ noise_range=None
71
+ )
72
+ blur_kernel2 = random_mixed_kernels(
73
+ self.blur_kernel_list2,
74
+ self.blur_kernel_prob2,
75
+ self.kernel_size,
76
+ self.blur_sigma2,
77
+ self.blur_sigma2, [-math.pi, math.pi],
78
+ self.betag_range2,
79
+ self.betap_range2,
80
+ noise_range=None
81
+ )
82
+ if self.kernel_size < 13:
83
+ omega_c = np.random.uniform(np.pi / 3, np.pi)
84
+ else:
85
+ omega_c = np.random.uniform(np.pi / 5, np.pi)
86
+ sinc_kernel = circular_lowpass_kernel(omega_c, self.kernel_size, pad_to=21)
87
+ return (blur_kernel1, blur_kernel2, sinc_kernel)
88
+
89
+ def __getitem__(self, index):
90
+
91
+ L_path = None
92
+ # ------------------------------------
93
+ # get H image
94
+ # ------------------------------------
95
+ H_path = self.paths_H[index]
96
+ img_H = util.imread_uint(H_path, self.n_channels)
97
+ img_H = util.uint2single(img_H)
98
+
99
+ # ------------------------------------
100
+ # modcrop
101
+ # ------------------------------------
102
+ img_H = util.modcrop(img_H, self.sf)
103
+
104
+ # ------------------------------------
105
+ # get L image
106
+ # ------------------------------------
107
+ if self.paths_L:
108
+ # --------------------------------
109
+ # directly load L image
110
+ # --------------------------------
111
+ L_path = self.paths_L[index]
112
+ img_L = util.imread_uint(L_path, self.n_channels)
113
+ img_L = util.uint2single(img_L)
114
+
115
+ else:
116
+ # --------------------------------
117
+ # sythesize L image via matlab's bicubic
118
+ # --------------------------------
119
+ H, W = img_H.shape[:2]
120
+ img_L = util.imresize_np(img_H, 1 / self.sf, True)
121
+
122
+ src_tensor = img2tensor(img_L.copy(), bgr2rgb=False,
123
+ float32=True).unsqueeze(0)
124
+
125
+ blur_kernel1, blur_kernel2, sinc_kernel = self._decide_kernels()
126
+ (img_L_2, sharp_img_L, degraded_img_L) = apply_real_esrgan_degradations(
127
+ src_tensor,
128
+ blur_kernel1=Tensor(blur_kernel1).unsqueeze(0),
129
+ blur_kernel2=Tensor(blur_kernel2).unsqueeze(0),
130
+ second_blur_prob=0.2,
131
+ sinc_kernel=Tensor(sinc_kernel).unsqueeze(0),
132
+ resize_prob1=[0.2, 0.7, 0.1],
133
+ resize_prob2=[0.3, 0.4, 0.3],
134
+ resize_range1=[0.9, 1.1],
135
+ resize_range2=[0.9, 1.1],
136
+ gray_noise_prob1=0.2,
137
+ gray_noise_prob2=0.2,
138
+ gaussian_noise_prob1=0.2,
139
+ gaussian_noise_prob2=0.2,
140
+ noise_range=[0.01, 0.2],
141
+ poisson_scale_range=[0.05, 0.45],
142
+ jpeg_compression_range1=[85, 100],
143
+ jpeg_compression_range2=[85, 100],
144
+ jpeg_simulator=self.jpeg_simulator,
145
+ random_crop_gt_size=256,
146
+ sr_upsample_scale=1,
147
+ usm_sharpener=self.usm_sharpener
148
+ )
149
+ # Image.fromarray((degraded_img_L[0] * 255).permute(
150
+ # 1, 2, 0).cpu().numpy().astype(np.uint8)).save(
151
+ # "/home/cll/Desktop/degraded_L.png")
152
+ # Image.fromarray((img_L * 255).astype(np.uint8)).save(
153
+ # "/home/cll/Desktop/img_L.png")
154
+ # Image.fromarray((img_L_2[0] * 255).permute(
155
+ # 1, 2, 0).cpu().numpy().astype(np.uint8)).save(
156
+ # "/home/cll/Desktop/img_L_2.png")
157
+ # exit()
158
+
159
+ # ------------------------------------
160
+ # if train, get L/H patch pair
161
+ # ------------------------------------
162
+ if self.opt['phase'] == 'train':
163
+
164
+ H, W, C = img_L.shape
165
+
166
+ # --------------------------------
167
+ # randomly crop the L patch
168
+ # --------------------------------
169
+ rnd_h = random.randint(0, max(0, H - self.L_size))
170
+ rnd_w = random.randint(0, max(0, W - self.L_size))
171
+ img_L = img_L[rnd_h:rnd_h + self.L_size, rnd_w:rnd_w + self.L_size, :]
172
+
173
+ # --------------------------------
174
+ # crop corresponding H patch
175
+ # --------------------------------
176
+ rnd_h_H, rnd_w_H = int(rnd_h * self.sf), int(rnd_w * self.sf)
177
+ img_H = img_H[rnd_h_H:rnd_h_H + self.patch_size, rnd_w_H:rnd_w_H + self.patch_size, :]
178
+
179
+ # --------------------------------
180
+ # augmentation - flip and/or rotate + RealESRGAN modified degradations
181
+ # --------------------------------
182
+ mode = random.randint(0, 7)
183
+ img_L, img_H = util.augment_img(img_L, mode=mode), util.augment_img(img_H, mode=mode)
184
+
185
+
186
+ # ------------------------------------
187
+ # L/H pairs, HWC to CHW, numpy to tensor
188
+ # ------------------------------------
189
+ img_H, img_L = util.single2tensor3(img_H), util.single2tensor3(img_L)
190
+
191
+ if L_path is None:
192
+ L_path = H_path
193
+
194
+ return {'L': img_L, 'H': img_H, 'L_path': L_path, 'H_path': H_path}
195
+
196
+ def __len__(self):
197
+ return len(self.paths_H)
KAIR/data/dataset_srmd.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch
4
+ import torch.utils.data as data
5
+ import utils.utils_image as util
6
+ from utils import utils_sisr
7
+
8
+
9
+ import hdf5storage
10
+ import os
11
+
12
+
13
+ class DatasetSRMD(data.Dataset):
14
+ '''
15
+ # -----------------------------------------
16
+ # Get L/H/M for noisy image SR with Gaussian kernels.
17
+ # Only "paths_H" is needed, sythesize bicubicly downsampled L on-the-fly.
18
+ # -----------------------------------------
19
+ # e.g., SRMD, H = f(L, kernel, sigma), sigma is noise level
20
+ # -----------------------------------------
21
+ '''
22
+
23
+ def __init__(self, opt):
24
+ super(DatasetSRMD, self).__init__()
25
+ self.opt = opt
26
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
27
+ self.sf = opt['scale'] if opt['scale'] else 4
28
+ self.patch_size = self.opt['H_size'] if self.opt['H_size'] else 96
29
+ self.L_size = self.patch_size // self.sf
30
+ self.sigma = opt['sigma'] if opt['sigma'] else [0, 50]
31
+ self.sigma_min, self.sigma_max = self.sigma[0], self.sigma[1]
32
+ self.sigma_test = opt['sigma_test'] if opt['sigma_test'] else 0
33
+
34
+ # -------------------------------------
35
+ # PCA projection matrix
36
+ # -------------------------------------
37
+ self.p = hdf5storage.loadmat(os.path.join('kernels', 'srmd_pca_pytorch.mat'))['p']
38
+ self.ksize = int(np.sqrt(self.p.shape[-1])) # kernel size
39
+
40
+ # ------------------------------------
41
+ # get paths of L/H
42
+ # ------------------------------------
43
+ self.paths_H = util.get_image_paths(opt['dataroot_H'])
44
+ self.paths_L = util.get_image_paths(opt['dataroot_L'])
45
+
46
+ def __getitem__(self, index):
47
+
48
+ # ------------------------------------
49
+ # get H image
50
+ # ------------------------------------
51
+ H_path = self.paths_H[index]
52
+ img_H = util.imread_uint(H_path, self.n_channels)
53
+ img_H = util.uint2single(img_H)
54
+
55
+ # ------------------------------------
56
+ # modcrop for SR
57
+ # ------------------------------------
58
+ img_H = util.modcrop(img_H, self.sf)
59
+
60
+ # ------------------------------------
61
+ # kernel
62
+ # ------------------------------------
63
+ if self.opt['phase'] == 'train':
64
+ l_max = 10
65
+ theta = np.pi*random.random()
66
+ l1 = 0.1+l_max*random.random()
67
+ l2 = 0.1+(l1-0.1)*random.random()
68
+
69
+ kernel = utils_sisr.anisotropic_Gaussian(ksize=self.ksize, theta=theta, l1=l1, l2=l2)
70
+ else:
71
+ kernel = utils_sisr.anisotropic_Gaussian(ksize=self.ksize, theta=np.pi, l1=0.1, l2=0.1)
72
+
73
+ k = np.reshape(kernel, (-1), order="F")
74
+ k_reduced = np.dot(self.p, k)
75
+ k_reduced = torch.from_numpy(k_reduced).float()
76
+
77
+ # ------------------------------------
78
+ # sythesize L image via specified degradation model
79
+ # ------------------------------------
80
+ H, W, _ = img_H.shape
81
+ img_L = utils_sisr.srmd_degradation(img_H, kernel, self.sf)
82
+ img_L = np.float32(img_L)
83
+
84
+ if self.opt['phase'] == 'train':
85
+ """
86
+ # --------------------------------
87
+ # get L/H patch pairs
88
+ # --------------------------------
89
+ """
90
+ H, W, C = img_L.shape
91
+
92
+ # --------------------------------
93
+ # randomly crop L patch
94
+ # --------------------------------
95
+ rnd_h = random.randint(0, max(0, H - self.L_size))
96
+ rnd_w = random.randint(0, max(0, W - self.L_size))
97
+ img_L = img_L[rnd_h:rnd_h + self.L_size, rnd_w:rnd_w + self.L_size, :]
98
+
99
+ # --------------------------------
100
+ # crop corresponding H patch
101
+ # --------------------------------
102
+ rnd_h_H, rnd_w_H = int(rnd_h * self.sf), int(rnd_w * self.sf)
103
+ img_H = img_H[rnd_h_H:rnd_h_H + self.patch_size, rnd_w_H:rnd_w_H + self.patch_size, :]
104
+
105
+ # --------------------------------
106
+ # augmentation - flip and/or rotate
107
+ # --------------------------------
108
+ mode = random.randint(0, 7)
109
+ img_L, img_H = util.augment_img(img_L, mode=mode), util.augment_img(img_H, mode=mode)
110
+
111
+ # --------------------------------
112
+ # get patch pairs
113
+ # --------------------------------
114
+ img_H, img_L = util.single2tensor3(img_H), util.single2tensor3(img_L)
115
+
116
+ # --------------------------------
117
+ # select noise level and get Gaussian noise
118
+ # --------------------------------
119
+ if random.random() < 0.1:
120
+ noise_level = torch.zeros(1).float()
121
+ else:
122
+ noise_level = torch.FloatTensor([np.random.uniform(self.sigma_min, self.sigma_max)])/255.0
123
+ # noise_level = torch.rand(1)*50/255.0
124
+ # noise_level = torch.min(torch.from_numpy(np.float32([7*np.random.chisquare(2.5)/255.0])),torch.Tensor([50./255.]))
125
+
126
+ else:
127
+
128
+ img_H, img_L = util.single2tensor3(img_H), util.single2tensor3(img_L)
129
+ noise_level = noise_level = torch.FloatTensor([self.sigma_test])
130
+
131
+ # ------------------------------------
132
+ # add noise
133
+ # ------------------------------------
134
+ noise = torch.randn(img_L.size()).mul_(noise_level).float()
135
+ img_L.add_(noise)
136
+
137
+ # ------------------------------------
138
+ # get degradation map M
139
+ # ------------------------------------
140
+ M_vector = torch.cat((k_reduced, noise_level), 0).unsqueeze(1).unsqueeze(1)
141
+ M = M_vector.repeat(1, img_L.size()[-2], img_L.size()[-1])
142
+
143
+ """
144
+ # -------------------------------------
145
+ # concat L and noise level map M
146
+ # -------------------------------------
147
+ """
148
+
149
+ img_L = torch.cat((img_L, M), 0)
150
+ L_path = H_path
151
+
152
+ return {'L': img_L, 'H': img_H, 'L_path': L_path, 'H_path': H_path}
153
+
154
+ def __len__(self):
155
+ return len(self.paths_H)
KAIR/data/dataset_usrnet.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ import numpy as np
4
+ import torch
5
+ import torch.utils.data as data
6
+ import utils.utils_image as util
7
+ from utils import utils_deblur
8
+ from utils import utils_sisr
9
+ import os
10
+
11
+ from scipy import ndimage
12
+ from scipy.io import loadmat
13
+ # import hdf5storage
14
+
15
+
16
+ class DatasetUSRNet(data.Dataset):
17
+ '''
18
+ # -----------------------------------------
19
+ # Get L/k/sf/sigma for USRNet.
20
+ # Only "paths_H" and kernel is needed, synthesize L on-the-fly.
21
+ # -----------------------------------------
22
+ '''
23
+ def __init__(self, opt):
24
+ super(DatasetUSRNet, self).__init__()
25
+ self.opt = opt
26
+ self.n_channels = opt['n_channels'] if opt['n_channels'] else 3
27
+ self.patch_size = self.opt['H_size'] if self.opt['H_size'] else 96
28
+ self.sigma_max = self.opt['sigma_max'] if self.opt['sigma_max'] is not None else 25
29
+ self.scales = opt['scales'] if opt['scales'] is not None else [1,2,3,4]
30
+ self.sf_validation = opt['sf_validation'] if opt['sf_validation'] is not None else 3
31
+ #self.kernels = hdf5storage.loadmat(os.path.join('kernels', 'kernels_12.mat'))['kernels']
32
+ self.kernels = loadmat(os.path.join('kernels', 'kernels_12.mat'))['kernels'] # for validation
33
+
34
+ # -------------------
35
+ # get the path of H
36
+ # -------------------
37
+ self.paths_H = util.get_image_paths(opt['dataroot_H']) # return None if input is None
38
+ self.count = 0
39
+
40
+ def __getitem__(self, index):
41
+
42
+ # -------------------
43
+ # get H image
44
+ # -------------------
45
+ H_path = self.paths_H[index]
46
+ img_H = util.imread_uint(H_path, self.n_channels)
47
+ L_path = H_path
48
+
49
+ if self.opt['phase'] == 'train':
50
+
51
+ # ---------------------------
52
+ # 1) scale factor, ensure each batch only involves one scale factor
53
+ # ---------------------------
54
+ if self.count % self.opt['dataloader_batch_size'] == 0:
55
+ # sf = random.choice([1,2,3,4])
56
+ self.sf = random.choice(self.scales)
57
+ # self.count = 0 # optional
58
+ self.count += 1
59
+ H, W, _ = img_H.shape
60
+
61
+ # ----------------------------
62
+ # randomly crop the patch
63
+ # ----------------------------
64
+ rnd_h = random.randint(0, max(0, H - self.patch_size))
65
+ rnd_w = random.randint(0, max(0, W - self.patch_size))
66
+ patch_H = img_H[rnd_h:rnd_h + self.patch_size, rnd_w:rnd_w + self.patch_size, :]
67
+
68
+ # ---------------------------
69
+ # augmentation - flip, rotate
70
+ # ---------------------------
71
+ mode = np.random.randint(0, 8)
72
+ patch_H = util.augment_img(patch_H, mode=mode)
73
+
74
+ # ---------------------------
75
+ # 2) kernel
76
+ # ---------------------------
77
+ r_value = random.randint(0, 7)
78
+ if r_value>3:
79
+ k = utils_deblur.blurkernel_synthesis(h=25) # motion blur
80
+ else:
81
+ sf_k = random.choice(self.scales)
82
+ k = utils_sisr.gen_kernel(scale_factor=np.array([sf_k, sf_k])) # Gaussian blur
83
+ mode_k = random.randint(0, 7)
84
+ k = util.augment_img(k, mode=mode_k)
85
+
86
+ # ---------------------------
87
+ # 3) noise level
88
+ # ---------------------------
89
+ if random.randint(0, 8) == 1:
90
+ noise_level = 0/255.0
91
+ else:
92
+ noise_level = np.random.randint(0, self.sigma_max)/255.0
93
+
94
+ # ---------------------------
95
+ # Low-quality image
96
+ # ---------------------------
97
+ img_L = ndimage.filters.convolve(patch_H, np.expand_dims(k, axis=2), mode='wrap')
98
+ img_L = img_L[0::self.sf, 0::self.sf, ...]
99
+ # add Gaussian noise
100
+ img_L = util.uint2single(img_L) + np.random.normal(0, noise_level, img_L.shape)
101
+ img_H = patch_H
102
+
103
+ else:
104
+
105
+ k = self.kernels[0, 0].astype(np.float64) # validation kernel
106
+ k /= np.sum(k)
107
+ noise_level = 0./255.0 # validation noise level
108
+
109
+ # ------------------------------------
110
+ # modcrop
111
+ # ------------------------------------
112
+ img_H = util.modcrop(img_H, self.sf_validation)
113
+
114
+ img_L = ndimage.filters.convolve(img_H, np.expand_dims(k, axis=2), mode='wrap') # blur
115
+ img_L = img_L[0::self.sf_validation, 0::self.sf_validation, ...] # downsampling
116
+ img_L = util.uint2single(img_L) + np.random.normal(0, noise_level, img_L.shape)
117
+ self.sf = self.sf_validation
118
+
119
+ k = util.single2tensor3(np.expand_dims(np.float32(k), axis=2))
120
+ img_H, img_L = util.uint2tensor3(img_H), util.single2tensor3(img_L)
121
+ noise_level = torch.FloatTensor([noise_level]).view([1,1,1])
122
+
123
+ return {'L': img_L, 'H': img_H, 'k': k, 'sigma': noise_level, 'sf': self.sf, 'L_path': L_path, 'H_path': H_path}
124
+
125
+ def __len__(self):
126
+ return len(self.paths_H)
KAIR/data/dataset_video_test.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import torch
3
+ from os import path as osp
4
+ import torch.utils.data as data
5
+
6
+ import utils.utils_video as utils_video
7
+
8
+
9
+ class VideoRecurrentTestDataset(data.Dataset):
10
+ """Video test dataset for recurrent architectures, which takes LR video
11
+ frames as input and output corresponding HR video frames. Modified from
12
+ https://github.com/xinntao/BasicSR/blob/master/basicsr/data/reds_dataset.py
13
+
14
+ Supported datasets: Vid4, REDS4, REDSofficial.
15
+ More generally, it supports testing dataset with following structures:
16
+
17
+ dataroot
18
+ ├── subfolder1
19
+ ├── frame000
20
+ ├── frame001
21
+ ├── ...
22
+ ├── subfolder1
23
+ ├── frame000
24
+ ├── frame001
25
+ ├── ...
26
+ ├── ...
27
+
28
+ For testing datasets, there is no need to prepare LMDB files.
29
+
30
+ Args:
31
+ opt (dict): Config for train dataset. It contains the following keys:
32
+ dataroot_gt (str): Data root path for gt.
33
+ dataroot_lq (str): Data root path for lq.
34
+ io_backend (dict): IO backend type and other kwarg.
35
+ cache_data (bool): Whether to cache testing datasets.
36
+ name (str): Dataset name.
37
+ meta_info_file (str): The path to the file storing the list of test
38
+ folders. If not provided, all the folders in the dataroot will
39
+ be used.
40
+ num_frame (int): Window size for input frames.
41
+ padding (str): Padding mode.
42
+ """
43
+
44
+ def __init__(self, opt):
45
+ super(VideoRecurrentTestDataset, self).__init__()
46
+ self.opt = opt
47
+ self.cache_data = opt['cache_data']
48
+ self.gt_root, self.lq_root = opt['dataroot_gt'], opt['dataroot_lq']
49
+ self.data_info = {'lq_path': [], 'gt_path': [], 'folder': [], 'idx': [], 'border': []}
50
+
51
+ self.imgs_lq, self.imgs_gt = {}, {}
52
+ if 'meta_info_file' in opt:
53
+ with open(opt['meta_info_file'], 'r') as fin:
54
+ subfolders = [line.split(' ')[0] for line in fin]
55
+ subfolders_lq = [osp.join(self.lq_root, key) for key in subfolders]
56
+ subfolders_gt = [osp.join(self.gt_root, key) for key in subfolders]
57
+ else:
58
+ subfolders_lq = sorted(glob.glob(osp.join(self.lq_root, '*')))
59
+ subfolders_gt = sorted(glob.glob(osp.join(self.gt_root, '*')))
60
+
61
+ for subfolder_lq, subfolder_gt in zip(subfolders_lq, subfolders_gt):
62
+ # get frame list for lq and gt
63
+ subfolder_name = osp.basename(subfolder_lq)
64
+ img_paths_lq = sorted(list(utils_video.scandir(subfolder_lq, full_path=True)))
65
+ img_paths_gt = sorted(list(utils_video.scandir(subfolder_gt, full_path=True)))
66
+
67
+ max_idx = len(img_paths_lq)
68
+ assert max_idx == len(img_paths_gt), (f'Different number of images in lq ({max_idx})'
69
+ f' and gt folders ({len(img_paths_gt)})')
70
+
71
+ self.data_info['lq_path'].extend(img_paths_lq)
72
+ self.data_info['gt_path'].extend(img_paths_gt)
73
+ self.data_info['folder'].extend([subfolder_name] * max_idx)
74
+ for i in range(max_idx):
75
+ self.data_info['idx'].append(f'{i}/{max_idx}')
76
+ border_l = [0] * max_idx
77
+ for i in range(self.opt['num_frame'] // 2):
78
+ border_l[i] = 1
79
+ border_l[max_idx - i - 1] = 1
80
+ self.data_info['border'].extend(border_l)
81
+
82
+ # cache data or save the frame list
83
+ if self.cache_data:
84
+ print(f'Cache {subfolder_name} for VideoTestDataset...')
85
+ self.imgs_lq[subfolder_name] = utils_video.read_img_seq(img_paths_lq)
86
+ self.imgs_gt[subfolder_name] = utils_video.read_img_seq(img_paths_gt)
87
+ else:
88
+ self.imgs_lq[subfolder_name] = img_paths_lq
89
+ self.imgs_gt[subfolder_name] = img_paths_gt
90
+
91
+ # Find unique folder strings
92
+ self.folders = sorted(list(set(self.data_info['folder'])))
93
+ self.sigma = opt['sigma'] / 255. if 'sigma' in opt else 0 # for non-blind video denoising
94
+
95
+ def __getitem__(self, index):
96
+ folder = self.folders[index]
97
+
98
+ if self.sigma:
99
+ # for non-blind video denoising
100
+ if self.cache_data:
101
+ imgs_gt = self.imgs_gt[folder]
102
+ else:
103
+ imgs_gt = utils_video.read_img_seq(self.imgs_gt[folder])
104
+
105
+ torch.manual_seed(0)
106
+ noise_level = torch.ones((1, 1, 1, 1)) * self.sigma
107
+ noise = torch.normal(mean=0, std=noise_level.expand_as(imgs_gt))
108
+ imgs_lq = imgs_gt + noise
109
+ t, _, h, w = imgs_lq.shape
110
+ imgs_lq = torch.cat([imgs_lq, noise_level.expand(t, 1, h, w)], 1)
111
+ else:
112
+ # for video sr and deblurring
113
+ if self.cache_data:
114
+ imgs_lq = self.imgs_lq[folder]
115
+ imgs_gt = self.imgs_gt[folder]
116
+ else:
117
+ imgs_lq = utils_video.read_img_seq(self.imgs_lq[folder])
118
+ imgs_gt = utils_video.read_img_seq(self.imgs_gt[folder])
119
+
120
+ return {
121
+ 'L': imgs_lq,
122
+ 'H': imgs_gt,
123
+ 'folder': folder,
124
+ 'lq_path': self.imgs_lq[folder],
125
+ }
126
+
127
+ def __len__(self):
128
+ return len(self.folders)
129
+
130
+
131
+ class SingleVideoRecurrentTestDataset(data.Dataset):
132
+ """Single ideo test dataset for recurrent architectures, which takes LR video
133
+ frames as input and output corresponding HR video frames (only input LQ path).
134
+
135
+ More generally, it supports testing dataset with following structures:
136
+
137
+ dataroot
138
+ ├── subfolder1
139
+ ├── frame000
140
+ ├── frame001
141
+ ├── ...
142
+ ├── subfolder1
143
+ ├── frame000
144
+ ├── frame001
145
+ ├── ...
146
+ ├── ...
147
+
148
+ For testing datasets, there is no need to prepare LMDB files.
149
+
150
+ Args:
151
+ opt (dict): Config for train dataset. It contains the following keys:
152
+ dataroot_gt (str): Data root path for gt.
153
+ dataroot_lq (str): Data root path for lq.
154
+ io_backend (dict): IO backend type and other kwarg.
155
+ cache_data (bool): Whether to cache testing datasets.
156
+ name (str): Dataset name.
157
+ meta_info_file (str): The path to the file storing the list of test
158
+ folders. If not provided, all the folders in the dataroot will
159
+ be used.
160
+ num_frame (int): Window size for input frames.
161
+ padding (str): Padding mode.
162
+ """
163
+
164
+ def __init__(self, opt):
165
+ super(SingleVideoRecurrentTestDataset, self).__init__()
166
+ self.opt = opt
167
+ self.cache_data = opt['cache_data']
168
+ self.lq_root = opt['dataroot_lq']
169
+ self.data_info = {'lq_path': [], 'folder': [], 'idx': [], 'border': []}
170
+
171
+ self.imgs_lq = {}
172
+ if 'meta_info_file' in opt:
173
+ with open(opt['meta_info_file'], 'r') as fin:
174
+ subfolders = [line.split(' ')[0] for line in fin]
175
+ subfolders_lq = [osp.join(self.lq_root, key) for key in subfolders]
176
+ else:
177
+ subfolders_lq = sorted(glob.glob(osp.join(self.lq_root, '*')))
178
+
179
+ for subfolder_lq in subfolders_lq:
180
+ # get frame list for lq and gt
181
+ subfolder_name = osp.basename(subfolder_lq)
182
+ img_paths_lq = sorted(list(utils_video.scandir(subfolder_lq, full_path=True)))
183
+
184
+ max_idx = len(img_paths_lq)
185
+
186
+ self.data_info['lq_path'].extend(img_paths_lq)
187
+ self.data_info['folder'].extend([subfolder_name] * max_idx)
188
+ for i in range(max_idx):
189
+ self.data_info['idx'].append(f'{i}/{max_idx}')
190
+ border_l = [0] * max_idx
191
+ for i in range(self.opt['num_frame'] // 2):
192
+ border_l[i] = 1
193
+ border_l[max_idx - i - 1] = 1
194
+ self.data_info['border'].extend(border_l)
195
+
196
+ # cache data or save the frame list
197
+ if self.cache_data:
198
+ print(f'Cache {subfolder_name} for VideoTestDataset...')
199
+ self.imgs_lq[subfolder_name] = utils_video.read_img_seq(img_paths_lq)
200
+ else:
201
+ self.imgs_lq[subfolder_name] = img_paths_lq
202
+
203
+ # Find unique folder strings
204
+ self.folders = sorted(list(set(self.data_info['folder'])))
205
+
206
+ def __getitem__(self, index):
207
+ folder = self.folders[index]
208
+
209
+ if self.cache_data:
210
+ imgs_lq = self.imgs_lq[folder]
211
+ else:
212
+ imgs_lq = utils_video.read_img_seq(self.imgs_lq[folder])
213
+
214
+ return {
215
+ 'L': imgs_lq,
216
+ 'folder': folder,
217
+ 'lq_path': self.imgs_lq[folder],
218
+ }
219
+
220
+ def __len__(self):
221
+ return len(self.folders)
222
+
223
+
224
+ class VideoTestVimeo90KDataset(data.Dataset):
225
+ """Video test dataset for Vimeo90k-Test dataset.
226
+
227
+ It only keeps the center frame for testing.
228
+ For testing datasets, there is no need to prepare LMDB files.
229
+
230
+ Args:
231
+ opt (dict): Config for train dataset. It contains the following keys:
232
+ dataroot_gt (str): Data root path for gt.
233
+ dataroot_lq (str): Data root path for lq.
234
+ io_backend (dict): IO backend type and other kwarg.
235
+ cache_data (bool): Whether to cache testing datasets.
236
+ name (str): Dataset name.
237
+ meta_info_file (str): The path to the file storing the list of test
238
+ folders. If not provided, all the folders in the dataroot will
239
+ be used.
240
+ num_frame (int): Window size for input frames.
241
+ padding (str): Padding mode.
242
+ """
243
+
244
+ def __init__(self, opt):
245
+ super(VideoTestVimeo90KDataset, self).__init__()
246
+ self.opt = opt
247
+ self.cache_data = opt['cache_data']
248
+ if self.cache_data:
249
+ raise NotImplementedError('cache_data in Vimeo90K-Test dataset is not implemented.')
250
+ self.gt_root, self.lq_root = opt['dataroot_gt'], opt['dataroot_lq']
251
+ self.data_info = {'lq_path': [], 'gt_path': [], 'folder': [], 'idx': [], 'border': []}
252
+ neighbor_list = [i + (9 - opt['num_frame']) // 2 for i in range(opt['num_frame'])]
253
+
254
+ with open(opt['meta_info_file'], 'r') as fin:
255
+ subfolders = [line.split(' ')[0] for line in fin]
256
+ for idx, subfolder in enumerate(subfolders):
257
+ gt_path = osp.join(self.gt_root, subfolder, 'im4.png')
258
+ self.data_info['gt_path'].append(gt_path)
259
+ lq_paths = [osp.join(self.lq_root, subfolder, f'im{i}.png') for i in neighbor_list]
260
+ self.data_info['lq_path'].append(lq_paths)
261
+ self.data_info['folder'].append('vimeo90k')
262
+ self.data_info['idx'].append(f'{idx}/{len(subfolders)}')
263
+ self.data_info['border'].append(0)
264
+
265
+ self.pad_sequence = opt.get('pad_sequence', False)
266
+
267
+ def __getitem__(self, index):
268
+ lq_path = self.data_info['lq_path'][index]
269
+ gt_path = self.data_info['gt_path'][index]
270
+ imgs_lq = utils_video.read_img_seq(lq_path)
271
+ img_gt = utils_video.read_img_seq([gt_path])
272
+ img_gt.squeeze_(0)
273
+
274
+ if self.pad_sequence: # pad the sequence: 7 frames to 8 frames
275
+ imgs_lq = torch.cat([imgs_lq, imgs_lq[-1:,...]], dim=0)
276
+
277
+ return {
278
+ 'L': imgs_lq, # (t, c, h, w)
279
+ 'H': img_gt, # (c, h, w)
280
+ 'folder': self.data_info['folder'][index], # folder name
281
+ 'idx': self.data_info['idx'][index], # e.g., 0/843
282
+ 'border': self.data_info['border'][index], # 0 for non-border
283
+ 'lq_path': lq_path[self.opt['num_frame'] // 2] # center frame
284
+ }
285
+
286
+ def __len__(self):
287
+ return len(self.data_info['gt_path'])
288
+
289
+
290
+ class SingleVideoRecurrentTestDataset(data.Dataset):
291
+ """Single Video test dataset (only input LQ path).
292
+
293
+ Supported datasets: Vid4, REDS4, REDSofficial.
294
+ More generally, it supports testing dataset with following structures:
295
+
296
+ dataroot
297
+ ├── subfolder1
298
+ ├── frame000
299
+ ├── frame001
300
+ ├── ...
301
+ ├── subfolder1
302
+ ├── frame000
303
+ ├── frame001
304
+ ├── ...
305
+ ├── ...
306
+
307
+ For testing datasets, there is no need to prepare LMDB files.
308
+
309
+ Args:
310
+ opt (dict): Config for train dataset. It contains the following keys:
311
+ dataroot_gt (str): Data root path for gt.
312
+ dataroot_lq (str): Data root path for lq.
313
+ io_backend (dict): IO backend type and other kwarg.
314
+ cache_data (bool): Whether to cache testing datasets.
315
+ name (str): Dataset name.
316
+ meta_info_file (str): The path to the file storing the list of test
317
+ folders. If not provided, all the folders in the dataroot will
318
+ be used.
319
+ num_frame (int): Window size for input frames.
320
+ padding (str): Padding mode.
321
+ """
322
+
323
+ def __init__(self, opt):
324
+ super(SingleVideoRecurrentTestDataset, self).__init__()
325
+ self.opt = opt
326
+ self.cache_data = opt['cache_data']
327
+ self.lq_root = opt['dataroot_lq']
328
+ self.data_info = {'lq_path': [], 'folder': [], 'idx': [], 'border': []}
329
+ # file client (io backend)
330
+ self.file_client = None
331
+
332
+ self.imgs_lq = {}
333
+ if 'meta_info_file' in opt:
334
+ with open(opt['meta_info_file'], 'r') as fin:
335
+ subfolders = [line.split(' ')[0] for line in fin]
336
+ subfolders_lq = [osp.join(self.lq_root, key) for key in subfolders]
337
+ else:
338
+ subfolders_lq = sorted(glob.glob(osp.join(self.lq_root, '*')))
339
+
340
+ for subfolder_lq in subfolders_lq:
341
+ # get frame list for lq and gt
342
+ subfolder_name = osp.basename(subfolder_lq)
343
+ img_paths_lq = sorted(list(utils_video.scandir(subfolder_lq, full_path=True)))
344
+
345
+ max_idx = len(img_paths_lq)
346
+
347
+ self.data_info['lq_path'].extend(img_paths_lq)
348
+ self.data_info['folder'].extend([subfolder_name] * max_idx)
349
+ for i in range(max_idx):
350
+ self.data_info['idx'].append(f'{i}/{max_idx}')
351
+ border_l = [0] * max_idx
352
+ for i in range(self.opt['num_frame'] // 2):
353
+ border_l[i] = 1
354
+ border_l[max_idx - i - 1] = 1
355
+ self.data_info['border'].extend(border_l)
356
+
357
+ # cache data or save the frame list
358
+ if self.cache_data:
359
+ logger.info(f'Cache {subfolder_name} for VideoTestDataset...')
360
+ self.imgs_lq[subfolder_name] = utils_video.read_img_seq(img_paths_lq)
361
+ else:
362
+ self.imgs_lq[subfolder_name] = img_paths_lq
363
+
364
+ # Find unique folder strings
365
+ self.folders = sorted(list(set(self.data_info['folder'])))
366
+
367
+ def __getitem__(self, index):
368
+ folder = self.folders[index]
369
+
370
+ if self.cache_data:
371
+ imgs_lq = self.imgs_lq[folder]
372
+ else:
373
+ imgs_lq = utils_video.read_img_seq(self.imgs_lq[folder])
374
+
375
+ return {
376
+ 'L': imgs_lq,
377
+ 'folder': folder,
378
+ 'lq_path': self.imgs_lq[folder],
379
+ }
380
+
381
+ def __len__(self):
382
+ return len(self.folders)
KAIR/data/dataset_video_train.py ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+ import random
4
+ import torch
5
+ from pathlib import Path
6
+ import torch.utils.data as data
7
+
8
+ import utils.utils_video as utils_video
9
+
10
+
11
+ class VideoRecurrentTrainDataset(data.Dataset):
12
+ """Video dataset for training recurrent networks.
13
+
14
+ The keys are generated from a meta info txt file.
15
+ basicsr/data/meta_info/meta_info_XXX_GT.txt
16
+
17
+ Each line contains:
18
+ 1. subfolder (clip) name; 2. frame number; 3. image shape, separated by
19
+ a white space.
20
+ Examples:
21
+ 720p_240fps_1 100 (720,1280,3)
22
+ 720p_240fps_3 100 (720,1280,3)
23
+ ...
24
+
25
+ Key examples: "720p_240fps_1/00000"
26
+ GT (gt): Ground-Truth;
27
+ LQ (lq): Low-Quality, e.g., low-resolution/blurry/noisy/compressed frames.
28
+
29
+ Args:
30
+ opt (dict): Config for train dataset. It contains the following keys:
31
+ dataroot_gt (str): Data root path for gt.
32
+ dataroot_lq (str): Data root path for lq.
33
+ dataroot_flow (str, optional): Data root path for flow.
34
+ meta_info_file (str): Path for meta information file.
35
+ val_partition (str): Validation partition types. 'REDS4' or
36
+ 'official'.
37
+ io_backend (dict): IO backend type and other kwarg.
38
+
39
+ num_frame (int): Window size for input frames.
40
+ gt_size (int): Cropped patched size for gt patches.
41
+ interval_list (list): Interval list for temporal augmentation.
42
+ random_reverse (bool): Random reverse input frames.
43
+ use_hflip (bool): Use horizontal flips.
44
+ use_rot (bool): Use rotation (use vertical flip and transposing h
45
+ and w for implementation).
46
+
47
+ scale (bool): Scale, which will be added automatically.
48
+ """
49
+
50
+ def __init__(self, opt):
51
+ super(VideoRecurrentTrainDataset, self).__init__()
52
+ self.opt = opt
53
+ self.scale = opt.get('scale', 4)
54
+ self.gt_size = opt.get('gt_size', 256)
55
+ self.gt_root, self.lq_root = Path(opt['dataroot_gt']), Path(opt['dataroot_lq'])
56
+ self.filename_tmpl = opt.get('filename_tmpl', '08d')
57
+ self.filename_ext = opt.get('filename_ext', 'png')
58
+ self.num_frame = opt['num_frame']
59
+
60
+ keys = []
61
+ total_num_frames = [] # some clips may not have 100 frames
62
+ start_frames = [] # some clips may not start from 00000
63
+ train_folders = os.listdir(self.lq_root)
64
+ print("TRAIN FOLDER: ", train_folders[0])
65
+ with open(opt['meta_info_file'], 'r') as fin:
66
+ for line in fin:
67
+ folder, frame_num, _, start_frame = line.split(' ')
68
+ if folder in train_folders:
69
+ keys.extend([f'{folder}/{i:{self.filename_tmpl}}' for i in range(int(start_frame), int(start_frame)+int(frame_num))])
70
+ total_num_frames.extend([int(frame_num) for i in range(int(frame_num))])
71
+ start_frames.extend([int(start_frame) for i in range(int(frame_num))])
72
+
73
+ # remove the video clips used in validation
74
+ if opt['name'] == 'REDS':
75
+ if opt['val_partition'] == 'REDS4':
76
+ val_partition = ['000', '011', '015', '020']
77
+ elif opt['val_partition'] == 'official':
78
+ val_partition = [f'{v:03d}' for v in range(240, 270)]
79
+ else:
80
+ raise ValueError(f'Wrong validation partition {opt["val_partition"]}.'
81
+ f"Supported ones are ['official', 'REDS4'].")
82
+ else:
83
+ val_partition = []
84
+
85
+ self.keys = []
86
+ self.total_num_frames = [] # some clips may not have 100 frames
87
+ self.start_frames = []
88
+ if opt['test_mode']:
89
+ for i, v in zip(range(len(keys)), keys):
90
+ if v.split('/')[0] in val_partition:
91
+ self.keys.append(keys[i])
92
+ self.total_num_frames.append(total_num_frames[i])
93
+ self.start_frames.append(start_frames[i])
94
+ else:
95
+ for i, v in zip(range(len(keys)), keys):
96
+ if v.split('/')[0] not in val_partition:
97
+ self.keys.append(keys[i])
98
+ self.total_num_frames.append(total_num_frames[i])
99
+ self.start_frames.append(start_frames[i])
100
+
101
+ # file client (io backend)
102
+ self.file_client = None
103
+ self.io_backend_opt = opt['io_backend']
104
+ self.is_lmdb = False
105
+ if self.io_backend_opt['type'] == 'lmdb':
106
+ self.is_lmdb = True
107
+ if hasattr(self, 'flow_root') and self.flow_root is not None:
108
+ self.io_backend_opt['db_paths'] = [self.lq_root, self.gt_root, self.flow_root]
109
+ self.io_backend_opt['client_keys'] = ['lq', 'gt', 'flow']
110
+ else:
111
+ self.io_backend_opt['db_paths'] = [self.lq_root, self.gt_root]
112
+ self.io_backend_opt['client_keys'] = ['lq', 'gt']
113
+
114
+ # temporal augmentation configs
115
+ self.interval_list = opt.get('interval_list', [1])
116
+ self.random_reverse = opt.get('random_reverse', False)
117
+ interval_str = ','.join(str(x) for x in self.interval_list)
118
+ print(f'Temporal augmentation interval list: [{interval_str}]; '
119
+ f'random reverse is {self.random_reverse}.')
120
+
121
+ def __getitem__(self, index):
122
+ if self.file_client is None:
123
+ self.file_client = utils_video.FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
124
+
125
+ key = self.keys[index]
126
+ total_num_frames = self.total_num_frames[index]
127
+ start_frames = self.start_frames[index]
128
+ clip_name, frame_name = key.split('/') # key example: 000/00000000
129
+
130
+ # determine the neighboring frames
131
+ interval = random.choice(self.interval_list)
132
+
133
+ # ensure not exceeding the borders
134
+ start_frame_idx = int(frame_name)
135
+ endmost_start_frame_idx = start_frames + total_num_frames - self.num_frame * interval
136
+ if start_frame_idx > endmost_start_frame_idx:
137
+ start_frame_idx = random.randint(start_frames, endmost_start_frame_idx)
138
+ end_frame_idx = start_frame_idx + self.num_frame * interval
139
+
140
+ neighbor_list = list(range(start_frame_idx, end_frame_idx, interval))
141
+
142
+ # random reverse
143
+ if self.random_reverse and random.random() < 0.5:
144
+ neighbor_list.reverse()
145
+
146
+ # get the neighboring LQ and GT frames
147
+ img_lqs = []
148
+ img_gts = []
149
+ for neighbor in neighbor_list:
150
+ if self.is_lmdb:
151
+ img_lq_path = f'{clip_name}/{neighbor:{self.filename_tmpl}}'
152
+ img_gt_path = f'{clip_name}/{neighbor:{self.filename_tmpl}}'
153
+ else:
154
+ img_lq_path = self.lq_root / clip_name / f'{neighbor:{self.filename_tmpl}}.{self.filename_ext}'
155
+ img_gt_path = self.gt_root / clip_name / f'{neighbor:{self.filename_tmpl}}.{self.filename_ext}'
156
+
157
+ # get LQ
158
+ img_bytes = self.file_client.get(img_lq_path, 'lq')
159
+ img_lq = utils_video.imfrombytes(img_bytes, float32=True)
160
+ img_lqs.append(img_lq)
161
+
162
+ # get GT
163
+ img_bytes = self.file_client.get(img_gt_path, 'gt')
164
+ img_gt = utils_video.imfrombytes(img_bytes, float32=True)
165
+ img_gts.append(img_gt)
166
+
167
+ # randomly crop
168
+ img_gts, img_lqs = utils_video.paired_random_crop(img_gts, img_lqs, self.gt_size, self.scale, img_gt_path)
169
+
170
+ # augmentation - flip, rotate
171
+ img_lqs.extend(img_gts)
172
+ img_results = utils_video.augment(img_lqs, self.opt['use_hflip'], self.opt['use_rot'])
173
+
174
+ img_results = utils_video.img2tensor(img_results)
175
+ img_gts = torch.stack(img_results[len(img_lqs) // 2:], dim=0)
176
+ img_lqs = torch.stack(img_results[:len(img_lqs) // 2], dim=0)
177
+
178
+ # img_lqs: (t, c, h, w)
179
+ # img_gts: (t, c, h, w)
180
+ # key: str
181
+ return {'L': img_lqs, 'H': img_gts, 'key': key}
182
+
183
+ def __len__(self):
184
+ return len(self.keys)
185
+
186
+
187
+ class VideoRecurrentTrainNonblindDenoisingDataset(VideoRecurrentTrainDataset):
188
+ """Video dataset for training recurrent architectures in non-blind video denoising.
189
+
190
+ Args:
191
+ Same as VideoTestDataset.
192
+
193
+ """
194
+
195
+ def __init__(self, opt):
196
+ super(VideoRecurrentTrainNonblindDenoisingDataset, self).__init__(opt)
197
+ self.sigma_min = self.opt['sigma_min'] / 255.
198
+ self.sigma_max = self.opt['sigma_max'] / 255.
199
+
200
+ def __getitem__(self, index):
201
+ if self.file_client is None:
202
+ self.file_client = utils_video.FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
203
+
204
+ key = self.keys[index]
205
+ total_num_frames = self.total_num_frames[index]
206
+ start_frames = self.start_frames[index]
207
+ clip_name, frame_name = key.split('/') # key example: 000/00000000
208
+
209
+ # determine the neighboring frames
210
+ interval = random.choice(self.interval_list)
211
+
212
+ # ensure not exceeding the borders
213
+ start_frame_idx = int(frame_name)
214
+ endmost_start_frame_idx = start_frames + total_num_frames - self.num_frame * interval
215
+ if start_frame_idx > endmost_start_frame_idx:
216
+ start_frame_idx = random.randint(start_frames, endmost_start_frame_idx)
217
+ end_frame_idx = start_frame_idx + self.num_frame * interval
218
+
219
+ neighbor_list = list(range(start_frame_idx, end_frame_idx, interval))
220
+
221
+ # random reverse
222
+ if self.random_reverse and random.random() < 0.5:
223
+ neighbor_list.reverse()
224
+
225
+ # get the neighboring GT frames
226
+ img_gts = []
227
+ for neighbor in neighbor_list:
228
+ if self.is_lmdb:
229
+ img_gt_path = f'{clip_name}/{neighbor:{self.filename_tmpl}}'
230
+ else:
231
+ img_gt_path = self.gt_root / clip_name / f'{neighbor:{self.filename_tmpl}}.{self.filename_ext}'
232
+
233
+ # get GT
234
+ img_bytes = self.file_client.get(img_gt_path, 'gt')
235
+ img_gt = utils_video.imfrombytes(img_bytes, float32=True)
236
+ img_gts.append(img_gt)
237
+
238
+ # randomly crop
239
+ img_gts, _ = utils_video.paired_random_crop(img_gts, img_gts, self.gt_size, 1, img_gt_path)
240
+
241
+ # augmentation - flip, rotate
242
+ img_gts = utils_video.augment(img_gts, self.opt['use_hflip'], self.opt['use_rot'])
243
+
244
+ img_gts = utils_video.img2tensor(img_gts)
245
+ img_gts = torch.stack(img_gts, dim=0)
246
+
247
+ # we add noise in the network
248
+ noise_level = torch.empty((1, 1, 1, 1)).uniform_(self.sigma_min, self.sigma_max)
249
+ noise = torch.normal(mean=0, std=noise_level.expand_as(img_gts))
250
+ img_lqs = img_gts + noise
251
+
252
+ t, _, h, w = img_lqs.shape
253
+ img_lqs = torch.cat([img_lqs, noise_level.expand(t, 1, h, w)], 1)
254
+
255
+ # img_lqs: (t, c, h, w)
256
+ # img_gts: (t, c, h, w)
257
+ # key: str
258
+ return {'L': img_lqs, 'H': img_gts, 'key': key}
259
+
260
+
261
+ def __len__(self):
262
+ return len(self.keys)
263
+
264
+
265
+ class VideoRecurrentTrainVimeoDataset(data.Dataset):
266
+ """Vimeo90K dataset for training recurrent networks.
267
+
268
+ The keys are generated from a meta info txt file.
269
+ basicsr/data/meta_info/meta_info_Vimeo90K_train_GT.txt
270
+
271
+ Each line contains:
272
+ 1. clip name; 2. frame number; 3. image shape, separated by a white space.
273
+ Examples:
274
+ 00001/0001 7 (256,448,3)
275
+ 00001/0002 7 (256,448,3)
276
+
277
+ Key examples: "00001/0001"
278
+ GT (gt): Ground-Truth;
279
+ LQ (lq): Low-Quality, e.g., low-resolution/blurry/noisy/compressed frames.
280
+
281
+ The neighboring frame list for different num_frame:
282
+ num_frame | frame list
283
+ 1 | 4
284
+ 3 | 3,4,5
285
+ 5 | 2,3,4,5,6
286
+ 7 | 1,2,3,4,5,6,7
287
+
288
+ Args:
289
+ opt (dict): Config for train dataset. It contains the following keys:
290
+ dataroot_gt (str): Data root path for gt.
291
+ dataroot_lq (str): Data root path for lq.
292
+ meta_info_file (str): Path for meta information file.
293
+ io_backend (dict): IO backend type and other kwarg.
294
+
295
+ num_frame (int): Window size for input frames.
296
+ gt_size (int): Cropped patched size for gt patches.
297
+ random_reverse (bool): Random reverse input frames.
298
+ use_hflip (bool): Use horizontal flips.
299
+ use_rot (bool): Use rotation (use vertical flip and transposing h
300
+ and w for implementation).
301
+
302
+ scale (bool): Scale, which will be added automatically.
303
+ """
304
+
305
+ def __init__(self, opt):
306
+ super(VideoRecurrentTrainVimeoDataset, self).__init__()
307
+ self.opt = opt
308
+ self.gt_root, self.lq_root = Path(opt['dataroot_gt']), Path(opt['dataroot_lq'])
309
+
310
+ with open(opt['meta_info_file'], 'r') as fin:
311
+ self.keys = [line.split(' ')[0] for line in fin]
312
+
313
+ # file client (io backend)
314
+ self.file_client = None
315
+ self.io_backend_opt = opt['io_backend']
316
+ self.is_lmdb = False
317
+ if self.io_backend_opt['type'] == 'lmdb':
318
+ self.is_lmdb = True
319
+ self.io_backend_opt['db_paths'] = [self.lq_root, self.gt_root]
320
+ self.io_backend_opt['client_keys'] = ['lq', 'gt']
321
+
322
+ # indices of input images
323
+ self.neighbor_list = [i + (9 - opt['num_frame']) // 2 for i in range(opt['num_frame'])]
324
+
325
+ # temporal augmentation configs
326
+ self.random_reverse = opt['random_reverse']
327
+ print(f'Random reverse is {self.random_reverse}.')
328
+
329
+ self.flip_sequence = opt.get('flip_sequence', False)
330
+ self.pad_sequence = opt.get('pad_sequence', False)
331
+ self.neighbor_list = [1, 2, 3, 4, 5, 6, 7]
332
+
333
+ def __getitem__(self, index):
334
+ if self.file_client is None:
335
+ self.file_client = utils_video.FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
336
+
337
+ # random reverse
338
+ if self.random_reverse and random.random() < 0.5:
339
+ self.neighbor_list.reverse()
340
+
341
+ scale = self.opt['scale']
342
+ gt_size = self.opt['gt_size']
343
+ key = self.keys[index]
344
+ clip, seq = key.split('/') # key example: 00001/0001
345
+
346
+ # get the neighboring LQ and GT frames
347
+ img_lqs = []
348
+ img_gts = []
349
+ for neighbor in self.neighbor_list:
350
+ if self.is_lmdb:
351
+ img_lq_path = f'{clip}/{seq}/im{neighbor}'
352
+ img_gt_path = f'{clip}/{seq}/im{neighbor}'
353
+ else:
354
+ img_lq_path = self.lq_root / clip / seq / f'im{neighbor}.png'
355
+ img_gt_path = self.gt_root / clip / seq / f'im{neighbor}.png'
356
+ # LQ
357
+ img_bytes = self.file_client.get(img_lq_path, 'lq')
358
+ img_lq = utils_video.imfrombytes(img_bytes, float32=True)
359
+ # GT
360
+ img_bytes = self.file_client.get(img_gt_path, 'gt')
361
+ img_gt = utils_video.imfrombytes(img_bytes, float32=True)
362
+
363
+ img_lqs.append(img_lq)
364
+ img_gts.append(img_gt)
365
+
366
+ # randomly crop
367
+ img_gts, img_lqs = utils_video.paired_random_crop(img_gts, img_lqs, gt_size, scale, img_gt_path)
368
+
369
+ # augmentation - flip, rotate
370
+ img_lqs.extend(img_gts)
371
+ img_results = utils_video.augment(img_lqs, self.opt['use_hflip'], self.opt['use_rot'])
372
+
373
+ img_results = utils_video.img2tensor(img_results)
374
+ img_lqs = torch.stack(img_results[:7], dim=0)
375
+ img_gts = torch.stack(img_results[7:], dim=0)
376
+
377
+ if self.flip_sequence: # flip the sequence: 7 frames to 14 frames
378
+ img_lqs = torch.cat([img_lqs, img_lqs.flip(0)], dim=0)
379
+ img_gts = torch.cat([img_gts, img_gts.flip(0)], dim=0)
380
+ elif self.pad_sequence: # pad the sequence: 7 frames to 8 frames
381
+ img_lqs = torch.cat([img_lqs, img_lqs[-1:,...]], dim=0)
382
+ img_gts = torch.cat([img_gts, img_gts[-1:,...]], dim=0)
383
+
384
+ # img_lqs: (t, c, h, w)
385
+ # img_gt: (c, h, w)
386
+ # key: str
387
+ return {'L': img_lqs, 'H': img_gts, 'key': key}
388
+
389
+ def __len__(self):
390
+ return len(self.keys)
KAIR/data/degradations.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+
3
+ import numpy as np
4
+ import random
5
+ import torch
6
+ from numpy.typing import NDArray
7
+
8
+ from basicsr.data.degradations import random_add_gaussian_noise_pt, random_add_poisson_noise_pt
9
+ from basicsr.data.transforms import paired_random_crop
10
+ from basicsr.utils import DiffJPEG, USMSharp
11
+ from basicsr.utils.img_process_util import filter2D
12
+ from torch import Tensor
13
+ from torch.nn import functional as F
14
+
15
+
16
+ def blur(img: Tensor, kernel: NDArray) -> Tensor:
17
+ return filter2D(img, kernel)
18
+
19
+
20
+ def random_resize(
21
+ img: Tensor,
22
+ resize_prob: float,
23
+ resize_range: Tuple[int, int],
24
+ output_scale: float = 1
25
+ ) -> Tensor:
26
+ updown_type = random.choices(['up', 'down', 'keep'], resize_prob)[0]
27
+ if updown_type == 'up':
28
+ random_scale = np.random.uniform(1, resize_range[1])
29
+ elif updown_type == 'down':
30
+ random_scale = np.random.uniform(resize_range[0], 1)
31
+ else:
32
+ random_scale = 1
33
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
34
+ out = F.interpolate(img, scale_factor=output_scale * random_scale, mode=mode)
35
+ return out
36
+
37
+
38
+ def add_noise(
39
+ img: Tensor,
40
+ gray_noise_prob: float,
41
+ gaussian_noise_prob: float,
42
+ noise_range: Tuple[float, float],
43
+ poisson_scale_range: Tuple[float, float]
44
+ ) -> Tensor:
45
+ if np.random.uniform() < gaussian_noise_prob:
46
+ img = random_add_gaussian_noise_pt(
47
+ img, sigma_range=noise_range, clip=True, rounds=False,
48
+ gray_prob=gray_noise_prob)
49
+ else:
50
+ img = random_add_poisson_noise_pt(
51
+ img, scale_range=poisson_scale_range,
52
+ gray_prob=gray_noise_prob, clip=True, rounds=False)
53
+ return img
54
+
55
+
56
+ def jpeg_compression_simulation(
57
+ img: Tensor,
58
+ jpeg_range: Tuple[float, float],
59
+ jpeg_simulator: DiffJPEG
60
+ ) -> Tensor:
61
+ jpeg_p = img.new_zeros(img.size(0)).uniform_(*jpeg_range)
62
+
63
+ # clamp to [0, 1], otherwise JPEGer will result in unpleasant artifacts
64
+ img = torch.clamp(img, 0, 1)
65
+ return jpeg_simulator(img, quality=jpeg_p)
66
+
67
+
68
+ @torch.no_grad()
69
+ def apply_real_esrgan_degradations(
70
+ gt: Tensor,
71
+ blur_kernel1: NDArray,
72
+ blur_kernel2: NDArray,
73
+ second_blur_prob: float,
74
+ sinc_kernel: NDArray,
75
+ resize_prob1: float,
76
+ resize_prob2: float,
77
+ resize_range1: Tuple[int, int],
78
+ resize_range2: Tuple[int, int],
79
+ gray_noise_prob1: float,
80
+ gray_noise_prob2: float,
81
+ gaussian_noise_prob1: float,
82
+ gaussian_noise_prob2: float,
83
+ noise_range: Tuple[float, float],
84
+ poisson_scale_range: Tuple[float, float],
85
+ jpeg_compression_range1: Tuple[float, float],
86
+ jpeg_compression_range2: Tuple[float, float],
87
+ jpeg_simulator: DiffJPEG,
88
+ random_crop_gt_size: 512,
89
+ sr_upsample_scale: float,
90
+ usm_sharpener: USMSharp
91
+ ):
92
+ """
93
+ Accept batch from batchloader, and then add two-order degradations
94
+ to obtain LQ images.
95
+
96
+ gt: Tensor of shape (B x C x H x W)
97
+ """
98
+ gt_usm = usm_sharpener(gt)
99
+ # from PIL import Image
100
+ # Image.fromarray((gt_usm[0].permute(1, 2, 0).cpu().numpy() * 255.).astype(np.uint8)).save(
101
+ # "/home/cll/Desktop/GT_USM_orig.png")
102
+ orig_h, orig_w = gt.size()[2:4]
103
+
104
+ # ----------------------- The first degradation process ----------------------- #
105
+ out = blur(gt_usm, blur_kernel1)
106
+ out = random_resize(out, resize_prob1, resize_range1)
107
+ out = add_noise(out, gray_noise_prob1, gaussian_noise_prob1, noise_range, poisson_scale_range)
108
+ out = jpeg_compression_simulation(out, jpeg_compression_range1, jpeg_simulator)
109
+
110
+ # ----------------------- The second degradation process ----------------------- #
111
+ if np.random.uniform() < second_blur_prob:
112
+ out = blur(out, blur_kernel2)
113
+ out = random_resize(out, resize_prob2, resize_range2, output_scale=(1/sr_upsample_scale))
114
+ out = add_noise(out, gray_noise_prob2, gaussian_noise_prob2,
115
+ noise_range, poisson_scale_range)
116
+
117
+ # JPEG compression + the final sinc filter
118
+ # We also need to resize images to desired sizes.
119
+ # We group [resize back + sinc filter] together
120
+ # as one operation.
121
+ # We consider two orders:
122
+ # 1. [resize back + sinc filter] + JPEG compression
123
+ # 2. JPEG compression + [resize back + sinc filter]
124
+ # Empirically, we find other combinations (sinc + JPEG + Resize)
125
+ # will introduce twisted lines.
126
+ if np.random.uniform() < 0.5:
127
+ # resize back + the final sinc filter
128
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
129
+ out = F.interpolate(out, size=(orig_h // sr_upsample_scale,
130
+ orig_w // sr_upsample_scale), mode=mode)
131
+ out = blur(out, sinc_kernel)
132
+ out = jpeg_compression_simulation(out, jpeg_compression_range2, jpeg_simulator)
133
+ else:
134
+ out = jpeg_compression_simulation(out, jpeg_compression_range2, jpeg_simulator)
135
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
136
+ out = F.interpolate(out, size=(orig_h // sr_upsample_scale,
137
+ orig_w // sr_upsample_scale), mode=mode)
138
+ out = blur(out, sinc_kernel)
139
+
140
+ # clamp and round
141
+ lq = torch.clamp((out * 255.0).round(), 0, 255) / 255.
142
+
143
+ (gt, gt_usm), lq = paired_random_crop([gt, gt_usm], lq, random_crop_gt_size, sr_upsample_scale)
144
+
145
+ return gt, gt_usm, lq
KAIR/data/select_dataset.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ '''
4
+ # --------------------------------------------
5
+ # select dataset
6
+ # --------------------------------------------
7
+ # Kai Zhang (github: https://github.com/cszn)
8
+ # --------------------------------------------
9
+ '''
10
+
11
+
12
+ def define_Dataset(dataset_opt):
13
+ dataset_type = dataset_opt['dataset_type'].lower()
14
+ if dataset_type in ['l', 'low-quality', 'input-only']:
15
+ from data.dataset_l import DatasetL as D
16
+
17
+ # -----------------------------------------
18
+ # denoising
19
+ # -----------------------------------------
20
+ elif dataset_type in ['dncnn', 'denoising']:
21
+ from data.dataset_dncnn import DatasetDnCNN as D
22
+
23
+ elif dataset_type in ['dnpatch']:
24
+ from data.dataset_dnpatch import DatasetDnPatch as D
25
+
26
+ elif dataset_type in ['ffdnet', 'denoising-noiselevel']:
27
+ from data.dataset_ffdnet import DatasetFFDNet as D
28
+
29
+ elif dataset_type in ['fdncnn', 'denoising-noiselevelmap']:
30
+ from data.dataset_fdncnn import DatasetFDnCNN as D
31
+
32
+ # -----------------------------------------
33
+ # super-resolution
34
+ # -----------------------------------------
35
+ elif dataset_type in ['sr', 'super-resolution']:
36
+ from data.dataset_sr import DatasetSR as D
37
+
38
+ elif dataset_type in ['srmd']:
39
+ from data.dataset_srmd import DatasetSRMD as D
40
+
41
+ elif dataset_type in ['dpsr', 'dnsr']:
42
+ from data.dataset_dpsr import DatasetDPSR as D
43
+
44
+ elif dataset_type in ['usrnet', 'usrgan']:
45
+ from data.dataset_usrnet import DatasetUSRNet as D
46
+
47
+ elif dataset_type in ['bsrnet', 'bsrgan', 'blindsr']:
48
+ from data.dataset_blindsr import DatasetBlindSR as D
49
+
50
+ # -------------------------------------------------
51
+ # JPEG compression artifact reduction (deblocking)
52
+ # -------------------------------------------------
53
+ elif dataset_type in ['jpeg']:
54
+ from data.dataset_jpeg import DatasetJPEG as D
55
+
56
+ # -----------------------------------------
57
+ # video restoration
58
+ # -----------------------------------------
59
+ elif dataset_type in ['videorecurrenttraindataset']:
60
+ from data.dataset_video_train import VideoRecurrentTrainDataset as D
61
+ elif dataset_type in ['videorecurrenttrainnonblinddenoisingdataset']:
62
+ from data.dataset_video_train import VideoRecurrentTrainNonblindDenoisingDataset as D
63
+ elif dataset_type in ['videorecurrenttrainvimeodataset']:
64
+ from data.dataset_video_train import VideoRecurrentTrainVimeoDataset as D
65
+ elif dataset_type in ['videorecurrenttestdataset']:
66
+ from data.dataset_video_test import VideoRecurrentTestDataset as D
67
+ elif dataset_type in ['singlevideorecurrenttestdataset']:
68
+ from data.dataset_video_test import SingleVideoRecurrentTestDataset as D
69
+ elif dataset_type in ['videotestvimeo90kdataset']:
70
+ from data.dataset_video_test import VideoTestVimeo90KDataset as D
71
+
72
+ # -----------------------------------------
73
+ # common
74
+ # -----------------------------------------
75
+ elif dataset_type in ['plain']:
76
+ from data.dataset_plain import DatasetPlain as D
77
+
78
+ elif dataset_type in ['plainpatch']:
79
+ from data.dataset_plainpatch import DatasetPlainPatch as D
80
+
81
+ else:
82
+ raise NotImplementedError('Dataset [{:s}] is not found.'.format(dataset_type))
83
+
84
+ dataset = D(dataset_opt)
85
+ print('Dataset [{:s} - {:s}] is created.'.format(dataset.__class__.__name__, dataset_opt['name']))
86
+ return dataset
KAIR/docs/README_SwinIR.md ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SwinIR: Image Restoration Using Shifted Window Transformer
2
+ [paper](https://arxiv.org/abs/2108.10257)
3
+ **|**
4
+ [supplementary](https://github.com/JingyunLiang/SwinIR/releases/tag/v0.0)
5
+ **|**
6
+ [visual results](https://github.com/JingyunLiang/SwinIR/releases/tag/v0.0)
7
+ **|**
8
+ [original project page](https://github.com/JingyunLiang/SwinIR)
9
+ **|**
10
+ [online Colab demo](https://colab.research.google.com/gist/JingyunLiang/a5e3e54bc9ef8d7bf594f6fee8208533/swinir-demo-on-real-world-image-sr.ipynb)
11
+
12
+ [![arXiv](https://img.shields.io/badge/arXiv-Paper-<COLOR>.svg)](https://arxiv.org/abs/2108.10257)
13
+ [![GitHub Stars](https://img.shields.io/github/stars/JingyunLiang/SwinIR?style=social)](https://github.com/JingyunLiang/SwinIR)
14
+ [![download](https://img.shields.io/github/downloads/JingyunLiang/SwinIR/total.svg)](https://github.com/JingyunLiang/SwinIR/releases)
15
+ [ <a href="https://colab.research.google.com/gist/JingyunLiang/a5e3e54bc9ef8d7bf594f6fee8208533/swinir-demo-on-real-world-image-sr.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="google colab logo"></a>](https://colab.research.google.com/gist/JingyunLiang/a5e3e54bc9ef8d7bf594f6fee8208533/swinir-demo-on-real-world-image-sr.ipynb)
16
+
17
+ > Image restoration is a long-standing low-level vision problem that aims to restore high-quality images from low-quality images (e.g., downscaled, noisy and compressed images). While state-of-the-art image restoration methods are based on convolutional neural networks, few attempts have been made with Transformers which show impressive performance on high-level vision tasks. In this paper, we propose a strong baseline model SwinIR for image restoration based on the Swin Transformer. SwinIR consists of three parts: shallow feature extraction, deep feature extraction and high-quality image reconstruction. In particular, the deep feature extraction module is composed of several residual Swin Transformer blocks (RSTB), each of which has several Swin Transformer layers together with a residual connection. We conduct experiments on three representative tasks: image super-resolution (including classical, lightweight and real-world image super-resolution), image denoising (including grayscale and color image denoising) and JPEG compression artifact reduction. Experimental results demonstrate that SwinIR outperforms state-of-the-art methods on different tasks by up to 0.14~0.45dB, while the total number of parameters can be reduced by up to 67%.
18
+
19
+
20
+ ### Dataset Preparation
21
+
22
+ Training and testing sets can be downloaded as follows. Please put them in `trainsets` and `testsets` respectively.
23
+
24
+ | Task | Training Set | Testing Set|
25
+ | :--- | :---: | :---: |
26
+ | classical/lightweight image SR | [DIV2K](https://cv.snu.ac.kr/research/EDSR/DIV2K.tar) (800 training images) or DIV2K +[Flickr2K](https://cv.snu.ac.kr/research/EDSR/Flickr2K.tar) (2650 images) | set5 + Set14 + BSD100 + Urban100 + Manga109 [download all](https://drive.google.com/drive/folders/1B3DJGQKB6eNdwuQIhdskA64qUuVKLZ9u) |
27
+ | real-world image SR | SwinIR-M (middle size): [DIV2K](https://cv.snu.ac.kr/research/EDSR/DIV2K.tar) (800 training images) +[Flickr2K](https://cv.snu.ac.kr/research/EDSR/Flickr2K.tar) (2650 images) + [OST](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/datasets/OST_dataset.zip) (10324 images, sky,water,grass,mountain,building,plant,animal) <br /> SwinIR-L (large size): DIV2K + Flickr2K + OST + [WED](http://ivc.uwaterloo.ca/database/WaterlooExploration/exploration_database_and_code.rar)(4744 images) + [FFHQ](https://drive.google.com/drive/folders/1tZUcXDBeOibC6jcMCtgRRz67pzrAHeHL) (first 2000 images, face) + Manga109 (manga) + [SCUT-CTW1500](https://universityofadelaide.box.com/shared/static/py5uwlfyyytbb2pxzq9czvu6fuqbjdh8.zip) (first 100 training images, texts) <br /><br /> ***We use the first practical degradation model [BSRGAN, ICCV2021 ![GitHub Stars](https://img.shields.io/github/stars/cszn/BSRGAN?style=social)](https://github.com/cszn/BSRGAN) for real-world image SR** | [RealSRSet+5images](https://github.com/JingyunLiang/SwinIR/releases/download/v0.0/RealSRSet+5images.zip) |
28
+ | color/grayscale image denoising | [DIV2K](https://cv.snu.ac.kr/research/EDSR/DIV2K.tar) (800 training images) + [Flickr2K](https://cv.snu.ac.kr/research/EDSR/Flickr2K.tar) (2650 images) + [BSD500](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/BSR/BSR_bsds500.tgz) (400 training&testing images) + [WED](http://ivc.uwaterloo.ca/database/WaterlooExploration/exploration_database_and_code.rar)(4744 images) | grayscale: Set12 + BSD68 + Urban100 <br /> color: CBSD68 + Kodak24 + McMaster + Urban100 [download all](https://github.com/cszn/FFDNet/tree/master/testsets) |
29
+ | JPEG compression artifact reduction | [DIV2K](https://cv.snu.ac.kr/research/EDSR/DIV2K.tar) (800 training images) + [Flickr2K](https://cv.snu.ac.kr/research/EDSR/Flickr2K.tar) (2650 images) + [BSD500](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/BSR/BSR_bsds500.tgz) (400 training&testing images) + [WED](http://ivc.uwaterloo.ca/database/WaterlooExploration/exploration_database_and_code.rar)(4744 images) | grayscale: Classic5 +LIVE1 [download all](https://github.com/cszn/DnCNN/tree/master/testsets) |
30
+
31
+
32
+ ### Training
33
+ To train SwinIR, run the following commands. You may need to change the `dataroot_H`, `dataroot_L`, `scale factor`, `noisel level`, `JPEG level`, `G_optimizer_lr`, `G_scheduler_milestones`, etc. in the json file for different settings.
34
+
35
+
36
+
37
+ ```python
38
+ # 001 Classical Image SR (middle size)
39
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_psnr.py --opt options/swinir/train_swinir_sr_classical.json --dist True
40
+
41
+ # 002 Lightweight Image SR (small size)
42
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_psnr.py --opt options/swinir/train_swinir_sr_lightweight.json --dist True
43
+
44
+ # 003 Real-World Image SR (middle size)
45
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_psnr.py --opt options/swinir/train_swinir_sr_realworld_psnr.json --dist True
46
+ # before training gan, put the PSNR-oriented model into superresolution/swinir_sr_realworld_x4_gan/models/
47
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_psnr.py --opt options/swinir/train_swinir_sr_realworld_gan.json --dist True
48
+
49
+ # 004 Grayscale Image Deoising (middle size)
50
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_psnr.py --opt options/swinir/train_swinir_denoising_gray.json --dist True
51
+
52
+ # 005 Color Image Deoising (middle size)
53
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_psnr.py --opt options/swinir/train_swinir_denoising_color.json --dist True
54
+
55
+ # 006 JPEG Compression Artifact Reduction (middle size)
56
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_psnr.py --opt options/swinir/train_swinir_car_jpeg.json --dist True
57
+ ```
58
+
59
+ You can also train above models using `DataParallel` as follows, but it will be slower.
60
+ ```python
61
+ # 001 Classical Image SR (middle size)
62
+ python main_train_psnr.py --opt options/swinir/train_swinir_sr_classical.json
63
+
64
+ ...
65
+ ```
66
+
67
+
68
+ Note:
69
+
70
+ 1, We fine-tune X3/X4/X8 (or noise=25/50, or JPEG=10/20/30) models from the X2 (or noise=15, or JPEG=40) model, so that total_iteration can be halved to save training time. In this case, we halve the initial learning rate and lr_milestones accordingly. This way has similar performance as training from scratch.
71
+
72
+ 2, For SR, we use different kinds of `Upsampler` in classical/lightweight/real-world image SR for the purpose of fair comparison with existing works.
73
+
74
+ 3, We did not re-train the models after cleaning the codes. Feel free to open an issue if you meet any problems.
75
+
76
+ ## Testing
77
+ Following command will download the [pretrained models](https://github.com/JingyunLiang/SwinIR/releases/tag/v0.0) and put them in `model_zoo/swinir`. All visual results of SwinIR can be downloaded [here](https://github.com/JingyunLiang/SwinIR/releases/tag/v0.0).
78
+
79
+ If you are too lazy to prepare the datasets, please follow the guide in the [original project page](https://github.com/JingyunLiang/SwinIR#testing-without-preparing-datasets), where you can start testing in a minute. We also provide an [online Colab demo for real-world image SR <a href="https://colab.research.google.com/gist/JingyunLiang/a5e3e54bc9ef8d7bf594f6fee8208533/swinir-demo-on-real-world-image-sr.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="google colab logo"></a>](https://colab.research.google.com/gist/JingyunLiang/a5e3e54bc9ef8d7bf594f6fee8208533/swinir-demo-on-real-world-image-sr.ipynb) for comparison with [the first practical degradation model BSRGAN (ICCV2021) ![GitHub Stars](https://img.shields.io/github/stars/cszn/BSRGAN?style=social)](https://github.com/cszn/BSRGAN) and a recent model [RealESRGAN](https://github.com/xinntao/Real-ESRGAN). Try to test your own images on Colab!
80
+
81
+ ```bash
82
+ # 001 Classical Image Super-Resolution (middle size)
83
+ # Note that --training_patch_size is just used to differentiate two different settings in Table 2 of the paper. Images are NOT tested patch by patch.
84
+ # (setting1: when model is trained on DIV2K and with training_patch_size=48)
85
+ python main_test_swinir.py --task classical_sr --scale 2 --training_patch_size 48 --model_path model_zoo/swinir/001_classicalSR_DIV2K_s48w8_SwinIR-M_x2.pth --folder_lq testsets/set5/LR_bicubic/X2 --folder_gt testsets/set5/HR
86
+ python main_test_swinir.py --task classical_sr --scale 3 --training_patch_size 48 --model_path model_zoo/swinir/001_classicalSR_DIV2K_s48w8_SwinIR-M_x3.pth --folder_lq testsets/set5/LR_bicubic/X3 --folder_gt testsets/set5/HR
87
+ python main_test_swinir.py --task classical_sr --scale 4 --training_patch_size 48 --model_path model_zoo/swinir/001_classicalSR_DIV2K_s48w8_SwinIR-M_x4.pth --folder_lq testsets/set5/LR_bicubic/X4 --folder_gt testsets/set5/HR
88
+ python main_test_swinir.py --task classical_sr --scale 8 --training_patch_size 48 --model_path model_zoo/swinir/001_classicalSR_DIV2K_s48w8_SwinIR-M_x8.pth --folder_lq testsets/set5/LR_bicubic/X8 --folder_gt testsets/set5/HR
89
+
90
+ # (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64)
91
+ python main_test_swinir.py --task classical_sr --scale 2 --training_patch_size 64 --model_path model_zoo/swinir/001_classicalSR_DF2K_s64w8_SwinIR-M_x2.pth --folder_lq testsets/set5/LR_bicubic/X2 --folder_gt testsets/set5/HR
92
+ python main_test_swinir.py --task classical_sr --scale 3 --training_patch_size 64 --model_path model_zoo/swinir/001_classicalSR_DF2K_s64w8_SwinIR-M_x3.pth --folder_lq testsets/set5/LR_bicubic/X3 --folder_gt testsets/set5/HR
93
+ python main_test_swinir.py --task classical_sr --scale 4 --training_patch_size 64 --model_path model_zoo/swinir/001_classicalSR_DF2K_s64w8_SwinIR-M_x4.pth --folder_lq testsets/set5/LR_bicubic/X4 --folder_gt testsets/set5/HR
94
+ python main_test_swinir.py --task classical_sr --scale 8 --training_patch_size 64 --model_path model_zoo/swinir/001_classicalSR_DF2K_s64w8_SwinIR-M_x8.pth --folder_lq testsets/set5/LR_bicubic/X8 --folder_gt testsets/set5/HR
95
+
96
+
97
+ # 002 Lightweight Image Super-Resolution (small size)
98
+ python main_test_swinir.py --task lightweight_sr --scale 2 --model_path model_zoo/swinir/002_lightweightSR_DIV2K_s64w8_SwinIR-S_x2.pth --folder_lq testsets/set5/LR_bicubic/X2 --folder_gt testsets/set5/HR
99
+ python main_test_swinir.py --task lightweight_sr --scale 3 --model_path model_zoo/swinir/002_lightweightSR_DIV2K_s64w8_SwinIR-S_x3.pth --folder_lq testsets/set5/LR_bicubic/X3 --folder_gt testsets/set5/HR
100
+ python main_test_swinir.py --task lightweight_sr --scale 4 --model_path model_zoo/swinir/002_lightweightSR_DIV2K_s64w8_SwinIR-S_x4.pth --folder_lq testsets/set5/LR_bicubic/X4 --folder_gt testsets/set5/HR
101
+
102
+
103
+ # 003 Real-World Image Super-Resolution (use --tile 400 if you run out-of-memory)
104
+ # (middle size)
105
+ python main_test_swinir.py --task real_sr --scale 4 --model_path model_zoo/swinir/003_realSR_BSRGAN_DFO_s64w8_SwinIR-M_x4_GAN.pth --folder_lq testsets/RealSRSet+5images
106
+
107
+ # (larger size + trained on more datasets)
108
+ python main_test_swinir.py --task real_sr --scale 4 --large_model --model_path model_zoo/swinir/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pth --folder_lq testsets/RealSRSet+5images
109
+
110
+
111
+ # 004 Grayscale Image Deoising (middle size)
112
+ python main_test_swinir.py --task gray_dn --noise 15 --model_path model_zoo/swinir/004_grayDN_DFWB_s128w8_SwinIR-M_noise15.pth --folder_gt testsets/set12
113
+ python main_test_swinir.py --task gray_dn --noise 25 --model_path model_zoo/swinir/004_grayDN_DFWB_s128w8_SwinIR-M_noise25.pth --folder_gt testsets/set12
114
+ python main_test_swinir.py --task gray_dn --noise 50 --model_path model_zoo/swinir/004_grayDN_DFWB_s128w8_SwinIR-M_noise50.pth --folder_gt testsets/set12
115
+
116
+
117
+ # 005 Color Image Deoising (middle size)
118
+ python main_test_swinir.py --task color_dn --noise 15 --model_path model_zoo/swinir/005_colorDN_DFWB_s128w8_SwinIR-M_noise15.pth --folder_gt testsets/McMaster
119
+ python main_test_swinir.py --task color_dn --noise 25 --model_path model_zoo/swinir/005_colorDN_DFWB_s128w8_SwinIR-M_noise25.pth --folder_gt testsets/McMaster
120
+ python main_test_swinir.py --task color_dn --noise 50 --model_path model_zoo/swinir/005_colorDN_DFWB_s128w8_SwinIR-M_noise50.pth --folder_gt testsets/McMaster
121
+
122
+
123
+ # 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding uses 8x8 blocks)
124
+ python main_test_swinir.py --task jpeg_car --jpeg 10 --model_path model_zoo/swinir/006_CAR_DFWB_s126w7_SwinIR-M_jpeg10.pth --folder_gt testsets/classic5
125
+ python main_test_swinir.py --task jpeg_car --jpeg 20 --model_path model_zoo/swinir/006_CAR_DFWB_s126w7_SwinIR-M_jpeg20.pth --folder_gt testsets/classic5
126
+ python main_test_swinir.py --task jpeg_car --jpeg 30 --model_path model_zoo/swinir/006_CAR_DFWB_s126w7_SwinIR-M_jpeg30.pth --folder_gt testsets/classic5
127
+ python main_test_swinir.py --task jpeg_car --jpeg 40 --model_path model_zoo/swinir/006_CAR_DFWB_s126w7_SwinIR-M_jpeg40.pth --folder_gt testsets/classic5
128
+ ```
129
+
130
+ ---
131
+
132
+ ## Results
133
+ <details>
134
+ <summary>Classical Image Super-Resolution (click me)</summary>
135
+ <p align="center">
136
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/classic_image_sr.png">
137
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/classic_image_sr_visual.png">
138
+ </p>
139
+ </details>
140
+
141
+ <details>
142
+ <summary>Lightweight Image Super-Resolution</summary>
143
+ <p align="center">
144
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/lightweight_image_sr.png">
145
+ </p>
146
+ </details>
147
+
148
+ <details>
149
+ <summary>Real-World Image Super-Resolution</summary>
150
+ <p align="center">
151
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/real_world_image_sr.png">
152
+ </p>
153
+ </details>
154
+
155
+
156
+ |&nbsp;&nbsp;&nbsp; Real-World Image (x4)|[BSRGAN, ICCV2021](https://github.com/cszn/BSRGAN)|[Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN)|SwinIR (ours)|
157
+ | :--- | :---: | :-----: | :-----: |
158
+ |<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/ETH_LR.png">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/ETH_BSRGAN.png">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/ETH_realESRGAN.jpg">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/ETH_SwinIR.png">
159
+ |<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/OST_009_crop_LR.png">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/OST_009_crop_BSRGAN.png">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/OST_009_crop_realESRGAN.png">|<img width="200" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/OST_009_crop_SwinIR.png">|
160
+
161
+ <details>
162
+ <summary>Grayscale Image Deoising</summary>
163
+ <p align="center">
164
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/gray_image_denoising.png">
165
+ </p>
166
+ </details>
167
+
168
+ <details>
169
+ <summary>Color Image Deoising</summary>
170
+ <p align="center">
171
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/color_image_denoising.png">
172
+ </p>
173
+ </details>
174
+
175
+ <details>
176
+ <summary>JPEG Compression Artifact Reduction</summary>
177
+ <p align="center">
178
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/SwinIR/main/figs/jepg_compress_artfact_reduction.png">
179
+ </p>
180
+ </details>
181
+
182
+
183
+
184
+ Please refer to the [paper](https://arxiv.org/abs/2108.10257) and the [original project page](https://github.com/JingyunLiang/SwinIR)
185
+ for more results.
186
+
187
+
188
+ ## Citation
189
+ @article{liang2021swinir,
190
+ title={SwinIR: Image Restoration Using Swin Transformer},
191
+ author={Liang, Jingyun and Cao, Jiezhang and Sun, Guolei and Zhang, Kai and Van Gool, Luc and Timofte, Radu},
192
+ journal={arXiv preprint arXiv:2108.10257},
193
+ year={2021}
194
+ }
KAIR/docs/README_VRT.md ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # [VRT: A Video Restoration Transformer](https://github.com/JingyunLiang/VRT)
2
+ [arxiv](https://arxiv.org/abs/2201.12288)
3
+ **|**
4
+ [supplementary](https://github.com/JingyunLiang/VRT/releases/download/v0.0/VRT_supplementary.pdf)
5
+ **|**
6
+ [pretrained models](https://github.com/JingyunLiang/VRT/releases)
7
+ **|**
8
+ [visual results](https://github.com/JingyunLiang/VRT/releases)
9
+ **|**
10
+ [original project page](https://github.com/JingyunLiang/VRT)
11
+
12
+ [![arXiv](https://img.shields.io/badge/arXiv-Paper-<COLOR>.svg)](https://arxiv.org/abs/2201.12288)
13
+ [![GitHub Stars](https://img.shields.io/github/stars/JingyunLiang/VRT?style=social)](https://github.com/JingyunLiang/VRT)
14
+ [![download](https://img.shields.io/github/downloads/JingyunLiang/VRT/total.svg)](https://github.com/JingyunLiang/VRT/releases)
15
+ ![visitors](https://visitor-badge.glitch.me/badge?page_id=jingyunliang/VRT)
16
+ [ <a href="https://colab.research.google.com/gist/JingyunLiang/deb335792768ad9eb73854a8efca4fe0#file-vrt-demo-on-video-restoration-ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="google colab logo"></a>](https://colab.research.google.com/gist/JingyunLiang/deb335792768ad9eb73854a8efca4fe0#file-vrt-demo-on-video-restoration-ipynb)
17
+
18
+ This is the readme of "VRT: A Video Restoration Transformer"
19
+ ([arxiv](https://arxiv.org/pdf/2201.12288.pdf), [supp](https://github.com/JingyunLiang/VRT/releases/download/v0.0/VRT_supplementary.pdf), [pretrained models](https://github.com/JingyunLiang/VRT/releases), [visual results](https://github.com/JingyunLiang/VRT/releases)). VRT ahcieves state-of-the-art performance **(up to 2.16dB)** in
20
+ - video SR (REDS, Vimeo90K, Vid4 and UDM10)
21
+ - video deblurring (GoPro, DVD and REDS)
22
+ - video denoising (DAVIS and Set8)
23
+
24
+ <p align="center">
25
+ <a href="https://github.com/JingyunLiang/VRT/releases">
26
+ <img width=30% src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/teaser_vsr.gif"/>
27
+ <img width=30% src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/teaser_vdb.gif"/>
28
+ <img width=30% src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/teaser_vdn.gif"/>
29
+ </a>
30
+ </p>
31
+
32
+ ---
33
+
34
+ > Video restoration (e.g., video super-resolution) aims to restore high-quality frames from low-quality frames. Different from single image restoration, video restoration generally requires to utilize temporal information from multiple adjacent but usually misaligned video frames. Existing deep methods generally tackle with this by exploiting a sliding window strategy or a recurrent architecture, which either is restricted by frame-by-frame restoration or lacks long-range modelling ability. In this paper, we propose a Video Restoration Transformer (VRT) with parallel frame prediction and long-range temporal dependency modelling abilities. More specifically, VRT is composed of multiple scales, each of which consists of two kinds of modules: temporal mutual self attention (TMSA) and parallel warping. TMSA divides the video into small clips, on which mutual attention is applied for joint motion estimation, feature alignment and feature fusion, while self-attention is used for feature extraction. To enable cross-clip interactions, the video sequence is shifted for every other layer. Besides, parallel warping is used to further fuse information from neighboring frames by parallel feature warping. Experimental results on three tasks, including video super-resolution, video deblurring and video denoising, demonstrate that VRT outperforms the state-of-the-art methods by large margins (**up to 2.16 dB**) on nine benchmark datasets.
35
+ <p align="center">
36
+ <img width="800" src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/framework.jpeg">
37
+ </p>
38
+
39
+ #### Contents
40
+
41
+ 1. [Requirements](#Requirements)
42
+ 1. [Quick Testing](#Quick-Testing)
43
+ 1. [Training](#Training)
44
+ 1. [Results](#Results)
45
+ 1. [Citation](#Citation)
46
+ 1. [License and Acknowledgement](#License-and-Acknowledgement)
47
+
48
+
49
+ ## Requirements
50
+ > - Python 3.8, PyTorch >= 1.9.1
51
+ > - Requirements: see requirements.txt
52
+ > - Platforms: Ubuntu 18.04, cuda-11.1
53
+
54
+ ## Quick Testing
55
+ Following commands will download [pretrained models](https://github.com/JingyunLiang/VRT/releases) and [test datasets](https://github.com/JingyunLiang/VRT/releases) **automatically** (except Vimeo-90K testing set). If out-of-memory, try to reduce `--tile` at the expense of slightly decreased performance.
56
+
57
+ You can also try to test it on Colab[ <a href="https://colab.research.google.com/gist/JingyunLiang/deb335792768ad9eb73854a8efca4fe0#file-vrt-demo-on-video-restoration-ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="google colab logo"></a>](https://colab.research.google.com/gist/JingyunLiang/deb335792768ad9eb73854a8efca4fe0#file-vrt-demo-on-video-restoration-ipynb), but the results may be slightly different due to `--tile` difference.
58
+ ```bash
59
+ # download code
60
+ git clone https://github.com/JingyunLiang/VRT
61
+ cd VRT
62
+ pip install -r requirements.txt
63
+
64
+ # 001, video sr trained on REDS (6 frames), tested on REDS4
65
+ python main_test_vrt.py --task 001_VRT_videosr_bi_REDS_6frames --folder_lq testsets/REDS4/sharp_bicubic --folder_gt testsets/REDS4/GT --tile 40 128 128 --tile_overlap 2 20 20
66
+
67
+ # 002, video sr trained on REDS (16 frames), tested on REDS4
68
+ python main_test_vrt.py --task 002_VRT_videosr_bi_REDS_16frames --folder_lq testsets/REDS4/sharp_bicubic --folder_gt testsets/REDS4/GT --tile 40 128 128 --tile_overlap 2 20 20
69
+
70
+ # 003, video sr trained on Vimeo (bicubic), tested on Vid4 and Vimeo
71
+ python main_test_vrt.py --task 003_VRT_videosr_bi_Vimeo_7frames --folder_lq testsets/Vid4/BIx4 --folder_gt testsets/Vid4/GT --tile 32 128 128 --tile_overlap 2 20 20
72
+ python main_test_vrt.py --task 003_VRT_videosr_bi_Vimeo_7frames --folder_lq testsets/vimeo90k/vimeo_septuplet_matlabLRx4/sequences --folder_gt testsets/vimeo90k/vimeo_septuplet/sequences --tile 8 0 0 --tile_overlap 0 20 20
73
+
74
+ # 004, video sr trained on Vimeo (blur-downsampling), tested on Vid4, UDM10 and Vimeo
75
+ python main_test_vrt.py --task 004_VRT_videosr_bd_Vimeo_7frames --folder_lq testsets/Vid4/BDx4 --folder_gt testsets/Vid4/GT --tile 32 128 128 --tile_overlap 2 20 20
76
+ python main_test_vrt.py --task 004_VRT_videosr_bd_Vimeo_7frames --folder_lq testsets/UDM10/BDx4 --folder_gt testsets/UDM10/GT --tile 32 128 128 --tile_overlap 2 20 20
77
+ python main_test_vrt.py --task 004_VRT_videosr_bd_Vimeo_7frames --folder_lq testsets/vimeo90k/vimeo_septuplet_BDLRx4/sequences --folder_gt testsets/vimeo90k/vimeo_septuplet/sequences --tile 8 0 0 --tile_overlap 0 20 20
78
+
79
+ # 005, video deblurring trained and tested on DVD
80
+ python main_test_vrt.py --task 005_VRT_videodeblurring_DVD --folder_lq testsets/DVD10/test_GT_blurred --folder_gt testsets/DVD10/test_GT --tile 12 256 256 --tile_overlap 2 20 20
81
+
82
+ # 006, video deblurring trained and tested on GoPro
83
+ python main_test_vrt.py --task 006_VRT_videodeblurring_GoPro --folder_lq testsets/GoPro11/test_GT_blurred --folder_gt testsets/GoPro11/test_GT --tile 18 192 192 --tile_overlap 2 20 20
84
+
85
+ # 007, video deblurring trained on REDS, tested on REDS4
86
+ python main_test_vrt.py --task 007_VRT_videodeblurring_REDS --folder_lq testsets/REDS4/blur --folder_gt testsets/REDS4/GT --tile 12 256 256 --tile_overlap 2 20 20
87
+
88
+ # 008, video denoising trained on DAVIS (noise level 0-50) and tested on Set8 and DAVIS
89
+ python main_test_vrt.py --task 008_VRT_videodenoising_DAVIS --sigma 10 --folder_lq testsets/Set8 --folder_gt testsets/Set8 --tile 12 256 256 --tile_overlap 2 20 20
90
+ python main_test_vrt.py --task 008_VRT_videodenoising_DAVIS --sigma 10 --folder_lq testsets/DAVIS-test --folder_gt testsets/DAVIS-test --tile 12 256 256 --tile_overlap 2 20 20
91
+
92
+ # test on your own datasets (an example)
93
+ python main_test_vrt.py --task 001_VRT_videosr_bi_REDS_6frames --folder_lq testsets/your/own --tile 40 128 128 --tile_overlap 2 20 20
94
+ ```
95
+
96
+ **All visual results of VRT can be downloaded [here](https://github.com/JingyunLiang/VRT/releases)**.
97
+
98
+
99
+ ## Training
100
+ The training and testing sets are as follows (see the [supplementary](https://github.com/JingyunLiang/VRT/releases) for a detailed introduction of all datasets). For better I/O speed, use commands like `python scripts/data_preparation/create_lmdb.py --dataset reds` to convert `.png` datasets to `.lmdb` datasets.
101
+
102
+ Note: You do **NOT need** to prepare the datasets if you just want to test the model. `main_test_vrt.py` will download the testing set automaticaly.
103
+
104
+
105
+ | Task | Training Set | Testing Set | Pretrained Model and Visual Results of VRT |
106
+ |:--------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| :---: |
107
+ | video SR (setting 1, BI) | [REDS sharp & sharp_bicubic](https://seungjunnah.github.io/Datasets/reds.html) (266 videos, 266000 frames: train + val except REDS4) <br /><br /> *Use [regroup_reds_dataset.py](https://github.com/cszn/KAIR/tree/master/scripts/data_preparation/regroup_reds_dataset.py) to regroup and rename REDS val set | REDS4 (4 videos, 400 frames: 000, 011, 015, 020 of REDS) | [here](https://github.com/JingyunLiang/VRT/releases) |
108
+ | video SR (setting 2 & 3, BI & BD) | [Vimeo90K](http://data.csail.mit.edu/tofu/dataset/vimeo_septuplet.zip) (64612 seven-frame videos as in `sep_trainlist.txt`) <br /><br /> * Use [generate_LR_Vimeo90K.m](https://github.com/cszn/KAIR/tree/master/scripts/matlab_scripts/generate_LR_Vimeo90K.m) and [generate_LR_Vimeo90K_BD.m](https://github.com/cszn/KAIR/tree/master/scripts/matlab_scripts/generate_LR_Vimeo90K_BD.m) to generate LR frames for bicubic and blur-downsampling VSR, respectively. | Vimeo90K-T (the rest 7824 7-frame videos) + [Vid4](https://drive.google.com/file/d/1ZuvNNLgR85TV_whJoHM7uVb-XW1y70DW/view) (4 videos) + [UDM10](https://www.terabox.com/web/share/link?surl=LMuQCVntRegfZSxn7s3hXw&path=%2Fproject%2Fpfnl) (10 videos) <br /><br /> *Use [prepare_UDM10.py](https://github.com/cszn/KAIR/tree/master/scripts/data_preparation/prepare_UDM10.py) to regroup and rename the UDM10 dataset | [here](https://github.com/JingyunLiang/VRT/releases) |
109
+ | video deblurring (setting 1, motion blur) | [DVD](http://www.cs.ubc.ca/labs/imager/tr/2017/DeepVideoDeblurring/DeepVideoDeblurring_Dataset.zip) (61 videos, 5708 frames) <br /><br /> *Use [prepare_DVD.py](https://github.com/cszn/KAIR/tree/master/scripts/data_preparation/prepare_DVD.py) to regroup and rename the dataset. | DVD (10 videos, 1000 frames) <br /><br /> *Use [evaluate_video_deblurring.m](https://github.com/cszn/KAIR/tree/master/scripts/matlab_scripts/evaluate_video_deblurring.m) for final evaluation. | [here](https://github.com/JingyunLiang/VRT/releases) |
110
+ | video deblurring (setting 2, motion blur) | [GoPro](http://data.cv.snu.ac.kr:8008/webdav/dataset/GOPRO/GOPRO_Large.zip) (22 videos, 2103 frames) <br /><br /> *Use [prepare_GoPro_as_video.py](https://github.com/cszn/KAIR/tree/master/scripts/data_preparation/prepare_GoPro_as_video.py) to regroup and rename the dataset. | GoPro (11 videos, 1111 frames) <br /><br /> *Use [evaluate_video_deblurring.m](https://github.com/cszn/KAIR/tree/master/scripts/matlab_scripts/evaluate_video_deblurring.m) for final evaluation. | [here](https://github.com/JingyunLiang/VRT/releases) |
111
+ | video deblurring (setting 3, motion blur) | [REDS sharp & blur](https://seungjunnah.github.io/Datasets/reds.html) (266 videos, 266000 frames: train & val except REDS4) <br /><br /> *Use [regroup_reds_dataset.py](https://github.com/cszn/KAIR/tree/master/scripts/data_preparation/regroup_reds_dataset.py) to regroup and rename REDS val set. Note that it shares the same HQ frames as in VSR. | REDS4 (4 videos, 400 frames: 000, 011, 015, 020 of REDS) | [here](https://github.com/JingyunLiang/VRT/releases) |
112
+ | video denoising (Gaussian noise) | [DAVIS-2017](https://data.vision.ee.ethz.ch/csergi/share/davis/DAVIS-2017-Unsupervised-trainval-480p.zip) (90 videos, 6208 frames) <br /><br /> *Use all files in DAVIS/JPEGImages/480p | [DAVIS-2017-test](https://github.com/JingyunLiang/VRT/releases) (30 videos) + [Set8](https://www.dropbox.com/sh/20n4cscqkqsfgoj/AABGftyJuJDwuCLGczL-fKvBa/test_sequences?dl=0&subfolder_nav_tracking=1) (8 videos: tractor, touchdown, park_joy and sunflower selected from DERF + hypersmooth, motorbike, rafting and snowboard from GOPRO_540P) | [here](https://github.com/JingyunLiang/VRT/releases) |
113
+
114
+ Run following commands for training:
115
+ ```bash
116
+ # download code
117
+ git clone https://github.com/cszn/KAIR
118
+ cd KAIR
119
+ pip install -r requirements.txt
120
+
121
+ # 001, video sr trained on REDS (6 frames), tested on REDS4
122
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_vrt.py --opt options/vrt/001_train_vrt_videosr_bi_reds_6frames.json --dist True
123
+
124
+ # 002, video sr trained on REDS (16 frames), tested on REDS4
125
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_vrt.py --opt options/vrt/002_train_vrt_videosr_bi_reds_16frames.json --dist True
126
+
127
+ # 003, video sr trained on Vimeo (bicubic), tested on Vid4 and Vimeo
128
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_vrt.py --opt options/vrt/003_train_vrt_videosr_bi_vimeo_7frames.json --dist True
129
+
130
+ # 004, video sr trained on Vimeo (blur-downsampling), tested on Vid4, Vimeo and UDM10
131
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_vrt.py --opt options/vrt/004_train_vrt_videosr_bd_vimeo_7frames.json --dist True
132
+
133
+ # 005, video deblurring trained and tested on DVD
134
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_vrt.py --opt options/vrt/005_train_vrt_videodeblurring_dvd.json --dist True
135
+
136
+ # 006, video deblurring trained and tested on GoPro
137
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_vrt.py --opt options/vrt/006_train_vrt_videodeblurring_gopro.json --dist True
138
+
139
+ # 007, video deblurring trained on REDS, tested on REDS4
140
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_vrt.py --opt options/vrt/007_train_vrt_videodeblurring_reds.json --dist True
141
+
142
+ # 008, video denoising trained on DAVIS (noise level 0-50) and tested on Set8 and DAVIS
143
+ python -m torch.distributed.launch --nproc_per_node=8 --master_port=1234 main_train_vrt.py --opt options/vrt/008_train_vrt_videodenoising_davis.json --dist True
144
+ ```
145
+ Tip: The training process will terminate automatically at 20000 iteration due to a bug. Just resume training after that.
146
+ <details>
147
+ <summary>Bug</summary>
148
+ Bug: PyTorch DistributedDataParallel (DDP) does not support `torch.utils.checkpoint` well. To alleviate the problem, set `find_unused_parameters=False` when `use_checkpoint=True`. If there are other errors, make sure that unused parameters will not change during training loop and set `use_static_graph=True`.
149
+
150
+ If you find a better solution, feel free to pull a request. Thank you.
151
+ </details>
152
+
153
+ ## Results
154
+ We achieved state-of-the-art performance on video SR, video deblurring and video denoising. Detailed results can be found in the [paper](https://arxiv.org/abs/2201.12288).
155
+
156
+ <details>
157
+ <summary>Video Super-Resolution (click me)</summary>
158
+ <p align="center">
159
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/vsr.jpeg">
160
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/vsr_visual.jpeg">
161
+ </p>
162
+ </details>
163
+
164
+ <details>
165
+ <summary>Video Deblurring</summary>
166
+ <p align="center">
167
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/vdb_dvd_gopro.jpeg">
168
+ <img width="900" src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/vdb_visual.jpeg">
169
+ <img width="350" src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/vdb_reds.jpeg">
170
+ </p>
171
+ </details>
172
+
173
+ <details>
174
+ <summary>Video Denoising</summary>
175
+ <p align="center">
176
+ <img width="350" src="https://raw.githubusercontent.com/JingyunLiang/VRT/main/assets/vdn.jpeg">
177
+ </p>
178
+ </details>
179
+
180
+
181
+ ## Citation
182
+ @article{liang2022vrt,
183
+ title={VRT: A Video Restoration Transformer},
184
+ author={Liang, Jingyun and Cao, Jiezhang and Fan, Yuchen and Zhang, Kai and Ranjan, Rakesh and Li, Yawei and Timofte, Radu and Van Gool, Luc},
185
+ journal={arXiv preprint arXiv:2201.12288},
186
+ year={2022}
187
+ }
188
+
189
+
190
+ ## License and Acknowledgement
191
+ This project is released under the CC-BY-NC license. We refer to codes from [KAIR](https://github.com/cszn/KAIR), [BasicSR](https://github.com/xinntao/BasicSR), [Video Swin Transformer](https://github.com/SwinTransformer/Video-Swin-Transformer) and [mmediting](https://github.com/open-mmlab/mmediting). Thanks for their awesome works. The majority of VRT is licensed under CC-BY-NC, however portions of the project are available under separate license terms: KAIR is licensed under the MIT License, BasicSR, Video Swin Transformer and mmediting are licensed under the Apache 2.0 license.
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_095438.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": null,
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "trainsets/REDS/train_sharp_with_val.lmdb",
35
+ "dataroot_lq": "trainsets/REDS/train_sharp_bicubic_with_val.lmdb",
36
+ "meta_info_file": "data/meta_info/meta_info_REDS_GT.txt",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "lmdb"
43
+ },
44
+ "num_frame": 6,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "testsets/REDS4/GT",
63
+ "dataroot_lq": "testsets/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 6,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_095450.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": null,
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "trainsets/REDS/train_sharp_with_val.lmdb",
35
+ "dataroot_lq": "trainsets/REDS/train_sharp_bicubic_with_val.lmdb",
36
+ "meta_info_file": "data/meta_info/meta_info_REDS_GT.txt",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "lmdb"
43
+ },
44
+ "num_frame": 6,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "testsets/REDS4/GT",
63
+ "dataroot_lq": "testsets/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 6,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_095518.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": null,
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "trainsets/REDS/train_sharp_with_val.lmdb",
35
+ "dataroot_lq": "trainsets/REDS/train_sharp_bicubic_with_val.lmdb",
36
+ "meta_info_file": "data/meta_info/meta_info_REDS_GT.txt",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "lmdb"
43
+ },
44
+ "num_frame": 6,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "testsets/REDS4/GT",
63
+ "dataroot_lq": "testsets/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 6,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_101636.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": null,
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "/home/cll/datasets/REDS/val/val_sharp",
35
+ "dataroot_lq": "/home/cll/datasets/REDS/val/val_sharp_bicubic",
36
+ "meta_info_file": "",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "disk"
43
+ },
44
+ "num_frame": 6,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "/home/cll/Desktop/REDS4/GT",
63
+ "dataroot_lq": "/home/cll/Desktop/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 6,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_101949.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "/home/cll/dev/KAIR/model_zoo/vrt/",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "/home/cll/datasets/REDS/val/val_sharp",
35
+ "dataroot_lq": "/home/cll/datasets/REDS/val/val_sharp_bicubic",
36
+ "meta_info_file": "",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "disk"
43
+ },
44
+ "num_frame": 6,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "/home/cll/Desktop/REDS4/GT",
63
+ "dataroot_lq": "/home/cll/Desktop/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 6,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_102114.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "/home/cll/dev/KAIR/model_zoo/vrt/",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "/home/cll/datasets/REDS/val/val_sharp",
35
+ "dataroot_lq": "/home/cll/datasets/REDS/val/val_sharp_bicubic",
36
+ "meta_info_file": "data/meta_info/meta_info_REDS_GT.txt",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "disk"
43
+ },
44
+ "num_frame": 6,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "/home/cll/Desktop/REDS4/GT",
63
+ "dataroot_lq": "/home/cll/Desktop/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 6,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_102214.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "/home/cll/dev/KAIR/model_zoo/vrt/001_VRT_videosr_bi_REDS_6frames.pth",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "/home/cll/datasets/REDS/val/val_sharp",
35
+ "dataroot_lq": "/home/cll/datasets/REDS/val/val_sharp_bicubic",
36
+ "meta_info_file": "data/meta_info/meta_info_REDS_GT.txt",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "disk"
43
+ },
44
+ "num_frame": 6,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "/home/cll/Desktop/REDS4/GT",
63
+ "dataroot_lq": "/home/cll/Desktop/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 6,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_104612.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "/home/cll/dev/KAIR/model_zoo/vrt/001_VRT_videosr_bi_REDS_6frames.pth",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "/home/cll/datasets/REDS/train/train_sharp",
35
+ "dataroot_lq": "/home/cll/datasets/REDS/train/train_sharp_bicubic/X4",
36
+ "meta_info_file": "data/meta_info/meta_info_REDS_GT.txt",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "disk"
43
+ },
44
+ "num_frame": 6,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "/home/cll/Desktop/REDS4/GT",
63
+ "dataroot_lq": "/home/cll/Desktop/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 6,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_105219.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "/home/cll/dev/KAIR/model_zoo/vrt/001_VRT_videosr_bi_REDS_6frames.pth",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "/home/cll/datasets/REDS/train/train_sharp",
35
+ "dataroot_lq": "/home/cll/datasets/REDS/train/train_sharp_bicubic/X4",
36
+ "meta_info_file": "data/meta_info/meta_info_REDS_GT.txt",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "disk"
43
+ },
44
+ "num_frame": 6,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "/home/cll/Desktop/REDS4/GT",
63
+ "dataroot_lq": "/home/cll/Desktop/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 6,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_105304.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "/home/cll/dev/KAIR/model_zoo/vrt/001_VRT_videosr_bi_REDS_6frames.pth",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "/home/cll/datasets/REDS/train/train_sharp",
35
+ "dataroot_lq": "/home/cll/datasets/REDS/train/train_sharp_bicubic/X4",
36
+ "meta_info_file": "data/meta_info/meta_info_REDS_GT.txt",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "disk"
43
+ },
44
+ "num_frame": 4,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "/home/cll/Desktop/REDS4/GT",
63
+ "dataroot_lq": "/home/cll/Desktop/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 6,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/options/001_train_vrt_videosr_bi_reds_6frames_220311_105340.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "001_train_vrt_videosr_bi_reds_6frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "/home/cll/dev/KAIR/model_zoo/vrt/001_VRT_videosr_bi_REDS_6frames.pth",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/001_train_vrt_videosr_bi_reds_6frames",
24
+ "log": "experiments/001_train_vrt_videosr_bi_reds_6frames",
25
+ "options": "experiments/001_train_vrt_videosr_bi_reds_6frames/options",
26
+ "models": "experiments/001_train_vrt_videosr_bi_reds_6frames/models",
27
+ "images": "experiments/001_train_vrt_videosr_bi_reds_6frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainDataset",
34
+ "dataroot_gt": "/home/cll/datasets/REDS/train/train_sharp",
35
+ "dataroot_lq": "/home/cll/datasets/REDS/train/train_sharp_bicubic/X4",
36
+ "meta_info_file": "data/meta_info/meta_info_REDS_GT.txt",
37
+ "filename_tmpl": "08d",
38
+ "filename_ext": "png",
39
+ "val_partition": "REDS4",
40
+ "test_mode": false,
41
+ "io_backend": {
42
+ "type": "disk"
43
+ },
44
+ "num_frame": 4,
45
+ "gt_size": 256,
46
+ "interval_list": [
47
+ 1
48
+ ],
49
+ "random_reverse": false,
50
+ "use_hflip": true,
51
+ "use_rot": true,
52
+ "dataloader_shuffle": true,
53
+ "dataloader_num_workers": 32,
54
+ "dataloader_batch_size": 8,
55
+ "phase": "train",
56
+ "scale": 4,
57
+ "n_channels": 3
58
+ },
59
+ "test": {
60
+ "name": "test_dataset",
61
+ "dataset_type": "VideoRecurrentTestDataset",
62
+ "dataroot_gt": "/home/cll/Desktop/REDS4/GT",
63
+ "dataroot_lq": "/home/cll/Desktop/REDS4/sharp_bicubic",
64
+ "cache_data": true,
65
+ "io_backend": {
66
+ "type": "disk"
67
+ },
68
+ "num_frame": -1,
69
+ "phase": "test",
70
+ "scale": 4,
71
+ "n_channels": 3
72
+ }
73
+ },
74
+ "netG": {
75
+ "net_type": "vrt",
76
+ "upscale": 4,
77
+ "img_size": [
78
+ 6,
79
+ 64,
80
+ 64
81
+ ],
82
+ "window_size": [
83
+ 2,
84
+ 8,
85
+ 8
86
+ ],
87
+ "depths": [
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 8,
93
+ 8,
94
+ 8,
95
+ 4,
96
+ 4,
97
+ 4,
98
+ 4,
99
+ 4,
100
+ 4
101
+ ],
102
+ "indep_reconsts": [
103
+ 11,
104
+ 12
105
+ ],
106
+ "embed_dims": [
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 120,
112
+ 120,
113
+ 120,
114
+ 180,
115
+ 180,
116
+ 180,
117
+ 180,
118
+ 180,
119
+ 180
120
+ ],
121
+ "num_heads": [
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6,
132
+ 6,
133
+ 6,
134
+ 6
135
+ ],
136
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
137
+ "pa_frames": 2,
138
+ "deformable_groups": 12,
139
+ "nonblind_denoising": false,
140
+ "use_checkpoint_attn": false,
141
+ "use_checkpoint_ffn": false,
142
+ "no_checkpoint_attn_blocks": [],
143
+ "no_checkpoint_ffn_blocks": [],
144
+ "init_type": "default",
145
+ "scale": 4
146
+ },
147
+ "train": {
148
+ "G_lossfn_type": "charbonnier",
149
+ "G_lossfn_weight": 1.0,
150
+ "G_charbonnier_eps": 1e-09,
151
+ "E_decay": 0,
152
+ "G_optimizer_type": "adam",
153
+ "G_optimizer_lr": 0.0004,
154
+ "G_optimizer_betas": [
155
+ 0.9,
156
+ 0.99
157
+ ],
158
+ "G_optimizer_wd": 0,
159
+ "G_optimizer_clipgrad": null,
160
+ "G_optimizer_reuse": true,
161
+ "fix_iter": 20000,
162
+ "fix_lr_mul": 0.125,
163
+ "fix_keys": [
164
+ "spynet",
165
+ "deform"
166
+ ],
167
+ "total_iter": 300000,
168
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
169
+ "G_scheduler_periods": 300000,
170
+ "G_scheduler_eta_min": 1e-07,
171
+ "G_regularizer_orthstep": null,
172
+ "G_regularizer_clipstep": null,
173
+ "G_param_strict": true,
174
+ "E_param_strict": true,
175
+ "checkpoint_test": 5000,
176
+ "checkpoint_save": 5000,
177
+ "checkpoint_print": 200,
178
+ "F_feature_layer": 34,
179
+ "F_weights": 1.0,
180
+ "F_lossfn_type": "l1",
181
+ "F_use_input_norm": true,
182
+ "F_use_range_norm": false,
183
+ "G_scheduler_restart_weights": 1
184
+ },
185
+ "val": {
186
+ "save_img": false,
187
+ "pad_seq": false,
188
+ "flip_seq": false,
189
+ "center_frame_only": false,
190
+ "num_frame_testing": 40,
191
+ "num_frame_overlapping": 2,
192
+ "size_patch_testing": 128
193
+ },
194
+ "opt_path": "options/vrt/001_train_vrt_videosr_bi_reds_6frames.json",
195
+ "is_train": true,
196
+ "merge_bn": false,
197
+ "merge_bn_startpoint": -1,
198
+ "num_gpu": 8,
199
+ "rank": 0,
200
+ "world_size": 1
201
+ }
KAIR/experiments/001_train_vrt_videosr_bi_reds_6frames/train.log ADDED
The diff for this file is too large to render. See raw diff
 
KAIR/experiments/003_train_vrt_videosr_bi_vimeo_7frames/options/003_train_vrt_videosr_bi_vimeo_7frames_220311_095626.json ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "003_train_vrt_videosr_bi_vimeo_7frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "model_zoo/vrt/002_VRT_videosr_bi_REDS_16frames.pth",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/003_train_vrt_videosr_bi_vimeo_7frames",
24
+ "log": "experiments/003_train_vrt_videosr_bi_vimeo_7frames",
25
+ "options": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/options",
26
+ "models": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/models",
27
+ "images": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainVimeoDataset",
34
+ "dataroot_gt": "trainsets/vimeo90k",
35
+ "dataroot_lq": "trainsets/vimeo90k",
36
+ "meta_info_file": "data/meta_info/meta_info_Vimeo90K_train_GT.txt",
37
+ "io_backend": {
38
+ "type": "file"
39
+ },
40
+ "num_frame": -1,
41
+ "gt_size": 256,
42
+ "interval_list": [
43
+ 1
44
+ ],
45
+ "random_reverse": true,
46
+ "use_hflip": true,
47
+ "use_rot": true,
48
+ "pad_sequence": true,
49
+ "dataloader_shuffle": true,
50
+ "dataloader_num_workers": 32,
51
+ "dataloader_batch_size": 8,
52
+ "phase": "train",
53
+ "scale": 4,
54
+ "n_channels": 3
55
+ },
56
+ "test": {
57
+ "name": "test_dataset",
58
+ "dataset_type": "VideoRecurrentTestDataset",
59
+ "dataroot_gt": "testsets/Vid4/GT",
60
+ "dataroot_lq": "testsets/Vid4/BIx4",
61
+ "cache_data": true,
62
+ "io_backend": {
63
+ "type": "disk"
64
+ },
65
+ "num_frame": -1,
66
+ "phase": "test",
67
+ "scale": 4,
68
+ "n_channels": 3
69
+ }
70
+ },
71
+ "netG": {
72
+ "net_type": "vrt",
73
+ "upscale": 4,
74
+ "img_size": [
75
+ 8,
76
+ 64,
77
+ 64
78
+ ],
79
+ "window_size": [
80
+ 8,
81
+ 8,
82
+ 8
83
+ ],
84
+ "depths": [
85
+ 8,
86
+ 8,
87
+ 8,
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 4,
93
+ 4,
94
+ 4,
95
+ 4,
96
+ 4,
97
+ 4
98
+ ],
99
+ "indep_reconsts": [
100
+ 11,
101
+ 12
102
+ ],
103
+ "embed_dims": [
104
+ 120,
105
+ 120,
106
+ 120,
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 180,
112
+ 180,
113
+ 180,
114
+ 180,
115
+ 180,
116
+ 180
117
+ ],
118
+ "num_heads": [
119
+ 6,
120
+ 6,
121
+ 6,
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6
132
+ ],
133
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
134
+ "pa_frames": 4,
135
+ "deformable_groups": 16,
136
+ "nonblind_denoising": false,
137
+ "use_checkpoint_attn": false,
138
+ "use_checkpoint_ffn": false,
139
+ "no_checkpoint_attn_blocks": [],
140
+ "no_checkpoint_ffn_blocks": [],
141
+ "init_type": "default",
142
+ "scale": 4
143
+ },
144
+ "train": {
145
+ "G_lossfn_type": "charbonnier",
146
+ "G_lossfn_weight": 1.0,
147
+ "G_charbonnier_eps": 1e-09,
148
+ "E_decay": 0,
149
+ "G_optimizer_type": "adam",
150
+ "G_optimizer_lr": 0.0004,
151
+ "G_optimizer_betas": [
152
+ 0.9,
153
+ 0.99
154
+ ],
155
+ "G_optimizer_wd": 0,
156
+ "G_optimizer_clipgrad": null,
157
+ "G_optimizer_reuse": true,
158
+ "fix_iter": 20000,
159
+ "fix_lr_mul": 0.125,
160
+ "fix_keys": [
161
+ "spynet",
162
+ "deform"
163
+ ],
164
+ "total_iter": 300000,
165
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
166
+ "G_scheduler_periods": 300000,
167
+ "G_scheduler_eta_min": 1e-07,
168
+ "G_regularizer_orthstep": null,
169
+ "G_regularizer_clipstep": null,
170
+ "G_param_strict": false,
171
+ "E_param_strict": true,
172
+ "checkpoint_test": 5000,
173
+ "checkpoint_save": 5000,
174
+ "checkpoint_print": 200,
175
+ "F_feature_layer": 34,
176
+ "F_weights": 1.0,
177
+ "F_lossfn_type": "l1",
178
+ "F_use_input_norm": true,
179
+ "F_use_range_norm": false,
180
+ "G_scheduler_restart_weights": 1
181
+ },
182
+ "val": {
183
+ "save_img": false,
184
+ "pad_seq": false,
185
+ "flip_seq": false,
186
+ "center_frame_only": false,
187
+ "num_frame_testing": 32,
188
+ "num_frame_overlapping": 2,
189
+ "size_patch_testing": 128
190
+ },
191
+ "opt_path": "options/vrt/003_train_vrt_videosr_bi_vimeo_7frames.json",
192
+ "is_train": true,
193
+ "merge_bn": false,
194
+ "merge_bn_startpoint": -1,
195
+ "num_gpu": 8,
196
+ "rank": 0,
197
+ "world_size": 1
198
+ }
KAIR/experiments/003_train_vrt_videosr_bi_vimeo_7frames/options/003_train_vrt_videosr_bi_vimeo_7frames_220311_101027.json ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "003_train_vrt_videosr_bi_vimeo_7frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "model_zoo/vrt/002_VRT_videosr_bi_REDS_16frames.pth",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/003_train_vrt_videosr_bi_vimeo_7frames",
24
+ "log": "experiments/003_train_vrt_videosr_bi_vimeo_7frames",
25
+ "options": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/options",
26
+ "models": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/models",
27
+ "images": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainVimeoDataset",
34
+ "dataroot_gt": "trainsets/vimeo90k",
35
+ "dataroot_lq": "trainsets/vimeo90k",
36
+ "meta_info_file": "data/meta_info/meta_info_Vimeo90K_train_GT.txt",
37
+ "io_backend": {
38
+ "type": "file"
39
+ },
40
+ "num_frame": -1,
41
+ "gt_size": 256,
42
+ "interval_list": [
43
+ 1
44
+ ],
45
+ "random_reverse": true,
46
+ "use_hflip": true,
47
+ "use_rot": true,
48
+ "pad_sequence": true,
49
+ "dataloader_shuffle": true,
50
+ "dataloader_num_workers": 32,
51
+ "dataloader_batch_size": 8,
52
+ "phase": "train",
53
+ "scale": 4,
54
+ "n_channels": 3
55
+ },
56
+ "test": {
57
+ "name": "test_dataset",
58
+ "dataset_type": "VideoRecurrentTestDataset",
59
+ "dataroot_gt": "testsets/Vid4/GT",
60
+ "dataroot_lq": "testsets/Vid4/BIx4",
61
+ "cache_data": true,
62
+ "io_backend": {
63
+ "type": "disk"
64
+ },
65
+ "num_frame": -1,
66
+ "phase": "test",
67
+ "scale": 4,
68
+ "n_channels": 3
69
+ }
70
+ },
71
+ "netG": {
72
+ "net_type": "vrt",
73
+ "upscale": 4,
74
+ "img_size": [
75
+ 8,
76
+ 64,
77
+ 64
78
+ ],
79
+ "window_size": [
80
+ 8,
81
+ 8,
82
+ 8
83
+ ],
84
+ "depths": [
85
+ 8,
86
+ 8,
87
+ 8,
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 4,
93
+ 4,
94
+ 4,
95
+ 4,
96
+ 4,
97
+ 4
98
+ ],
99
+ "indep_reconsts": [
100
+ 11,
101
+ 12
102
+ ],
103
+ "embed_dims": [
104
+ 120,
105
+ 120,
106
+ 120,
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 180,
112
+ 180,
113
+ 180,
114
+ 180,
115
+ 180,
116
+ 180
117
+ ],
118
+ "num_heads": [
119
+ 6,
120
+ 6,
121
+ 6,
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6
132
+ ],
133
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
134
+ "pa_frames": 4,
135
+ "deformable_groups": 16,
136
+ "nonblind_denoising": false,
137
+ "use_checkpoint_attn": false,
138
+ "use_checkpoint_ffn": false,
139
+ "no_checkpoint_attn_blocks": [],
140
+ "no_checkpoint_ffn_blocks": [],
141
+ "init_type": "default",
142
+ "scale": 4
143
+ },
144
+ "train": {
145
+ "G_lossfn_type": "charbonnier",
146
+ "G_lossfn_weight": 1.0,
147
+ "G_charbonnier_eps": 1e-09,
148
+ "E_decay": 0,
149
+ "G_optimizer_type": "adam",
150
+ "G_optimizer_lr": 0.0004,
151
+ "G_optimizer_betas": [
152
+ 0.9,
153
+ 0.99
154
+ ],
155
+ "G_optimizer_wd": 0,
156
+ "G_optimizer_clipgrad": null,
157
+ "G_optimizer_reuse": true,
158
+ "fix_iter": 20000,
159
+ "fix_lr_mul": 0.125,
160
+ "fix_keys": [
161
+ "spynet",
162
+ "deform"
163
+ ],
164
+ "total_iter": 300000,
165
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
166
+ "G_scheduler_periods": 300000,
167
+ "G_scheduler_eta_min": 1e-07,
168
+ "G_regularizer_orthstep": null,
169
+ "G_regularizer_clipstep": null,
170
+ "G_param_strict": false,
171
+ "E_param_strict": true,
172
+ "checkpoint_test": 5000,
173
+ "checkpoint_save": 5000,
174
+ "checkpoint_print": 200,
175
+ "F_feature_layer": 34,
176
+ "F_weights": 1.0,
177
+ "F_lossfn_type": "l1",
178
+ "F_use_input_norm": true,
179
+ "F_use_range_norm": false,
180
+ "G_scheduler_restart_weights": 1
181
+ },
182
+ "val": {
183
+ "save_img": false,
184
+ "pad_seq": false,
185
+ "flip_seq": false,
186
+ "center_frame_only": false,
187
+ "num_frame_testing": 32,
188
+ "num_frame_overlapping": 2,
189
+ "size_patch_testing": 128
190
+ },
191
+ "opt_path": "options/vrt/003_train_vrt_videosr_bi_vimeo_7frames.json",
192
+ "is_train": true,
193
+ "merge_bn": false,
194
+ "merge_bn_startpoint": -1,
195
+ "num_gpu": 8,
196
+ "rank": 0,
197
+ "world_size": 1
198
+ }
KAIR/experiments/003_train_vrt_videosr_bi_vimeo_7frames/options/003_train_vrt_videosr_bi_vimeo_7frames_220311_101042.json ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "003_train_vrt_videosr_bi_vimeo_7frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "model_zoo/vrt/002_VRT_videosr_bi_REDS_16frames.pth",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/003_train_vrt_videosr_bi_vimeo_7frames",
24
+ "log": "experiments/003_train_vrt_videosr_bi_vimeo_7frames",
25
+ "options": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/options",
26
+ "models": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/models",
27
+ "images": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainVimeoDataset",
34
+ "dataroot_gt": "trainsets/vimeo90k",
35
+ "dataroot_lq": "trainsets/vimeo90k",
36
+ "meta_info_file": "data/meta_info/meta_info_Vimeo90K_train_GT.txt",
37
+ "io_backend": {
38
+ "type": "disk"
39
+ },
40
+ "num_frame": -1,
41
+ "gt_size": 256,
42
+ "interval_list": [
43
+ 1
44
+ ],
45
+ "random_reverse": true,
46
+ "use_hflip": true,
47
+ "use_rot": true,
48
+ "pad_sequence": true,
49
+ "dataloader_shuffle": true,
50
+ "dataloader_num_workers": 32,
51
+ "dataloader_batch_size": 8,
52
+ "phase": "train",
53
+ "scale": 4,
54
+ "n_channels": 3
55
+ },
56
+ "test": {
57
+ "name": "test_dataset",
58
+ "dataset_type": "VideoRecurrentTestDataset",
59
+ "dataroot_gt": "testsets/Vid4/GT",
60
+ "dataroot_lq": "testsets/Vid4/BIx4",
61
+ "cache_data": true,
62
+ "io_backend": {
63
+ "type": "disk"
64
+ },
65
+ "num_frame": -1,
66
+ "phase": "test",
67
+ "scale": 4,
68
+ "n_channels": 3
69
+ }
70
+ },
71
+ "netG": {
72
+ "net_type": "vrt",
73
+ "upscale": 4,
74
+ "img_size": [
75
+ 8,
76
+ 64,
77
+ 64
78
+ ],
79
+ "window_size": [
80
+ 8,
81
+ 8,
82
+ 8
83
+ ],
84
+ "depths": [
85
+ 8,
86
+ 8,
87
+ 8,
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 4,
93
+ 4,
94
+ 4,
95
+ 4,
96
+ 4,
97
+ 4
98
+ ],
99
+ "indep_reconsts": [
100
+ 11,
101
+ 12
102
+ ],
103
+ "embed_dims": [
104
+ 120,
105
+ 120,
106
+ 120,
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 180,
112
+ 180,
113
+ 180,
114
+ 180,
115
+ 180,
116
+ 180
117
+ ],
118
+ "num_heads": [
119
+ 6,
120
+ 6,
121
+ 6,
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6
132
+ ],
133
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
134
+ "pa_frames": 4,
135
+ "deformable_groups": 16,
136
+ "nonblind_denoising": false,
137
+ "use_checkpoint_attn": false,
138
+ "use_checkpoint_ffn": false,
139
+ "no_checkpoint_attn_blocks": [],
140
+ "no_checkpoint_ffn_blocks": [],
141
+ "init_type": "default",
142
+ "scale": 4
143
+ },
144
+ "train": {
145
+ "G_lossfn_type": "charbonnier",
146
+ "G_lossfn_weight": 1.0,
147
+ "G_charbonnier_eps": 1e-09,
148
+ "E_decay": 0,
149
+ "G_optimizer_type": "adam",
150
+ "G_optimizer_lr": 0.0004,
151
+ "G_optimizer_betas": [
152
+ 0.9,
153
+ 0.99
154
+ ],
155
+ "G_optimizer_wd": 0,
156
+ "G_optimizer_clipgrad": null,
157
+ "G_optimizer_reuse": true,
158
+ "fix_iter": 20000,
159
+ "fix_lr_mul": 0.125,
160
+ "fix_keys": [
161
+ "spynet",
162
+ "deform"
163
+ ],
164
+ "total_iter": 300000,
165
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
166
+ "G_scheduler_periods": 300000,
167
+ "G_scheduler_eta_min": 1e-07,
168
+ "G_regularizer_orthstep": null,
169
+ "G_regularizer_clipstep": null,
170
+ "G_param_strict": false,
171
+ "E_param_strict": true,
172
+ "checkpoint_test": 5000,
173
+ "checkpoint_save": 5000,
174
+ "checkpoint_print": 200,
175
+ "F_feature_layer": 34,
176
+ "F_weights": 1.0,
177
+ "F_lossfn_type": "l1",
178
+ "F_use_input_norm": true,
179
+ "F_use_range_norm": false,
180
+ "G_scheduler_restart_weights": 1
181
+ },
182
+ "val": {
183
+ "save_img": false,
184
+ "pad_seq": false,
185
+ "flip_seq": false,
186
+ "center_frame_only": false,
187
+ "num_frame_testing": 32,
188
+ "num_frame_overlapping": 2,
189
+ "size_patch_testing": 128
190
+ },
191
+ "opt_path": "options/vrt/003_train_vrt_videosr_bi_vimeo_7frames.json",
192
+ "is_train": true,
193
+ "merge_bn": false,
194
+ "merge_bn_startpoint": -1,
195
+ "num_gpu": 8,
196
+ "rank": 0,
197
+ "world_size": 1
198
+ }
KAIR/experiments/003_train_vrt_videosr_bi_vimeo_7frames/options/003_train_vrt_videosr_bi_vimeo_7frames_220311_101058.json ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "003_train_vrt_videosr_bi_vimeo_7frames",
3
+ "model": "vrt",
4
+ "gpu_ids": [
5
+ 0,
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 6,
12
+ 7
13
+ ],
14
+ "dist": false,
15
+ "find_unused_parameters": false,
16
+ "use_static_graph": true,
17
+ "scale": 4,
18
+ "n_channels": 3,
19
+ "path": {
20
+ "root": "experiments",
21
+ "pretrained_netG": "model_zoo/vrt/002_VRT_videosr_bi_REDS_16frames.pth",
22
+ "pretrained_netE": null,
23
+ "task": "experiments/003_train_vrt_videosr_bi_vimeo_7frames",
24
+ "log": "experiments/003_train_vrt_videosr_bi_vimeo_7frames",
25
+ "options": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/options",
26
+ "models": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/models",
27
+ "images": "experiments/003_train_vrt_videosr_bi_vimeo_7frames/images",
28
+ "pretrained_optimizerG": null
29
+ },
30
+ "datasets": {
31
+ "train": {
32
+ "name": "train_dataset",
33
+ "dataset_type": "VideoRecurrentTrainVimeoDataset",
34
+ "dataroot_gt": "trainsets/vimeo90k",
35
+ "dataroot_lq": "trainsets/vimeo90k",
36
+ "meta_info_file": "data/meta_info/meta_info_Vimeo90K_train_GT.txt",
37
+ "io_backend": {
38
+ "type": "disk"
39
+ },
40
+ "num_frame": -1,
41
+ "gt_size": 256,
42
+ "interval_list": [
43
+ 1
44
+ ],
45
+ "random_reverse": true,
46
+ "use_hflip": true,
47
+ "use_rot": true,
48
+ "pad_sequence": true,
49
+ "dataloader_shuffle": true,
50
+ "dataloader_num_workers": 32,
51
+ "dataloader_batch_size": 8,
52
+ "phase": "train",
53
+ "scale": 4,
54
+ "n_channels": 3
55
+ },
56
+ "test": {
57
+ "name": "test_dataset",
58
+ "dataset_type": "VideoRecurrentTestDataset",
59
+ "dataroot_gt": "testsets/Vid4/GT",
60
+ "dataroot_lq": "testsets/Vid4/BIx4",
61
+ "cache_data": true,
62
+ "io_backend": {
63
+ "type": "disk"
64
+ },
65
+ "num_frame": -1,
66
+ "phase": "test",
67
+ "scale": 4,
68
+ "n_channels": 3
69
+ }
70
+ },
71
+ "netG": {
72
+ "net_type": "vrt",
73
+ "upscale": 4,
74
+ "img_size": [
75
+ 8,
76
+ 64,
77
+ 64
78
+ ],
79
+ "window_size": [
80
+ 8,
81
+ 8,
82
+ 8
83
+ ],
84
+ "depths": [
85
+ 8,
86
+ 8,
87
+ 8,
88
+ 8,
89
+ 8,
90
+ 8,
91
+ 8,
92
+ 4,
93
+ 4,
94
+ 4,
95
+ 4,
96
+ 4,
97
+ 4
98
+ ],
99
+ "indep_reconsts": [
100
+ 11,
101
+ 12
102
+ ],
103
+ "embed_dims": [
104
+ 120,
105
+ 120,
106
+ 120,
107
+ 120,
108
+ 120,
109
+ 120,
110
+ 120,
111
+ 180,
112
+ 180,
113
+ 180,
114
+ 180,
115
+ 180,
116
+ 180
117
+ ],
118
+ "num_heads": [
119
+ 6,
120
+ 6,
121
+ 6,
122
+ 6,
123
+ 6,
124
+ 6,
125
+ 6,
126
+ 6,
127
+ 6,
128
+ 6,
129
+ 6,
130
+ 6,
131
+ 6
132
+ ],
133
+ "spynet_path": "model_zoo/vrt/spynet_sintel_final-3d2a1287.pth",
134
+ "pa_frames": 4,
135
+ "deformable_groups": 16,
136
+ "nonblind_denoising": false,
137
+ "use_checkpoint_attn": false,
138
+ "use_checkpoint_ffn": false,
139
+ "no_checkpoint_attn_blocks": [],
140
+ "no_checkpoint_ffn_blocks": [],
141
+ "init_type": "default",
142
+ "scale": 4
143
+ },
144
+ "train": {
145
+ "G_lossfn_type": "charbonnier",
146
+ "G_lossfn_weight": 1.0,
147
+ "G_charbonnier_eps": 1e-09,
148
+ "E_decay": 0,
149
+ "G_optimizer_type": "adam",
150
+ "G_optimizer_lr": 0.0004,
151
+ "G_optimizer_betas": [
152
+ 0.9,
153
+ 0.99
154
+ ],
155
+ "G_optimizer_wd": 0,
156
+ "G_optimizer_clipgrad": null,
157
+ "G_optimizer_reuse": true,
158
+ "fix_iter": 20000,
159
+ "fix_lr_mul": 0.125,
160
+ "fix_keys": [
161
+ "spynet",
162
+ "deform"
163
+ ],
164
+ "total_iter": 300000,
165
+ "G_scheduler_type": "CosineAnnealingWarmRestarts",
166
+ "G_scheduler_periods": 300000,
167
+ "G_scheduler_eta_min": 1e-07,
168
+ "G_regularizer_orthstep": null,
169
+ "G_regularizer_clipstep": null,
170
+ "G_param_strict": false,
171
+ "E_param_strict": true,
172
+ "checkpoint_test": 5000,
173
+ "checkpoint_save": 5000,
174
+ "checkpoint_print": 200,
175
+ "F_feature_layer": 34,
176
+ "F_weights": 1.0,
177
+ "F_lossfn_type": "l1",
178
+ "F_use_input_norm": true,
179
+ "F_use_range_norm": false,
180
+ "G_scheduler_restart_weights": 1
181
+ },
182
+ "val": {
183
+ "save_img": false,
184
+ "pad_seq": false,
185
+ "flip_seq": false,
186
+ "center_frame_only": false,
187
+ "num_frame_testing": 32,
188
+ "num_frame_overlapping": 2,
189
+ "size_patch_testing": 128
190
+ },
191
+ "opt_path": "options/vrt/003_train_vrt_videosr_bi_vimeo_7frames.json",
192
+ "is_train": true,
193
+ "merge_bn": false,
194
+ "merge_bn_startpoint": -1,
195
+ "num_gpu": 8,
196
+ "rank": 0,
197
+ "world_size": 1
198
+ }
KAIR/experiments/003_train_vrt_videosr_bi_vimeo_7frames/train.log ADDED
The diff for this file is too large to render. See raw diff
 
KAIR/image_degradation.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import os
3
+
4
+ import numpy as np
5
+ from basicsr.data.degradations import circular_lowpass_kernel, random_mixed_kernels
6
+ from basicsr.utils import DiffJPEG, USMSharp
7
+ from numpy.typing import NDArray
8
+ from PIL import Image
9
+ from torch import Tensor
10
+ from torch.nn import functional as F
11
+
12
+ from data.degradations import apply_real_esrgan_degradations
13
+ from utils.utils_video import img2tensor
14
+
15
+
16
+ blur_kernel_list1 = ['iso', 'aniso', 'generalized_iso',
17
+ 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
18
+ blur_kernel_list2 = ['iso', 'aniso', 'generalized_iso',
19
+ 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
20
+ blur_kernel_prob1 = [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
21
+ blur_kernel_prob2 = [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
22
+ kernel_size = 21
23
+ blur_sigma1 = [0.05, 0.2]
24
+ blur_sigma2 = [0.05, 0.1]
25
+ betag_range1 = [0.7, 1.3]
26
+ betag_range2 = [0.7, 1.3]
27
+ betap_range1 = [0.7, 1.3]
28
+ betap_range2 = [0.7, 1.3]
29
+
30
+
31
+
32
+
33
+ def degrade_imgs(src_folder: str, dst_folder: str, degrade_scale: float, start_size: int) -> None:
34
+ src_img_filenames = os.listdir(src_folder)
35
+ jpeg_simulator = DiffJPEG()
36
+ usm_sharpener = USMSharp()
37
+ for src_img_filename in src_img_filenames:
38
+ src_img = Image.open(os.path.join(src_folder, src_img_filename))
39
+
40
+ src_tensor = img2tensor(np.array(src_img), bgr2rgb=False,
41
+ float32=True).unsqueeze(0) / 255.0
42
+ orig_h, orig_w = src_tensor.size()[2:4]
43
+ print("SRC TENSOR orig size: ", src_tensor.size())
44
+ if orig_h != start_size or orig_w != start_size:
45
+ src_tensor = F.interpolate(src_tensor, size=(start_size, start_size), mode='bicubic')
46
+ print("SRC TENSOR new size: ", src_tensor.size())
47
+
48
+ blur_kernel1, blur_kernel2, sinc_kernel = _decide_kernels()
49
+ (src, src_sharp, degraded_img) = apply_real_esrgan_degradations(
50
+ src_tensor,
51
+ blur_kernel1=Tensor(blur_kernel1).unsqueeze(0),
52
+ blur_kernel2=Tensor(blur_kernel2).unsqueeze(0),
53
+ second_blur_prob=0.4,
54
+ sinc_kernel=Tensor(sinc_kernel).unsqueeze(0),
55
+ resize_prob1=[0.2, 0.7, 0.1],
56
+ resize_prob2=[0.3, 0.4, 0.3],
57
+ resize_range1=[0.9, 1.1],
58
+ resize_range2=[0.9, 1.1],
59
+ gray_noise_prob1=0.2,
60
+ gray_noise_prob2=0.2,
61
+ gaussian_noise_prob1=0.2,
62
+ gaussian_noise_prob2=0.2,
63
+ noise_range=[0.01, 0.2],
64
+ poisson_scale_range=[0.05, 0.45],
65
+ jpeg_compression_range1=[85, 100],
66
+ jpeg_compression_range2=[85, 100],
67
+ jpeg_simulator=jpeg_simulator,
68
+ random_crop_gt_size=start_size,
69
+ sr_upsample_scale=1,
70
+ usm_sharpener=usm_sharpener
71
+ )
72
+
73
+ # print(src.size())
74
+ # print(src_sharp.size())
75
+ # print(degraded_img.size())
76
+ # print(torch.max(src))
77
+ # print(torch.max(src_sharp))
78
+ # print(torch.max(degraded_img))
79
+ # print(torch.min(src))
80
+ # print(torch.min(src_sharp))
81
+ # print(torch.min(degraded_img))
82
+ # Image.fromarray((src[0] * 255.0).permute(1, 2, 0).cpu().numpy().astype(np.uint8)).save(
83
+ # "/home/cll/Desktop/TEST_IMAGE1.png")
84
+ # Image.fromarray((src_sharp[0] * 255.0).permute(
85
+ # 1, 2, 0).cpu().numpy().astype(np.uint8)).save(
86
+ # "/home/cll/Desktop/TEST_IMAGE2.png")
87
+
88
+ Image.fromarray((degraded_img[0] * 255.0).permute(
89
+ 1, 2, 0).cpu().numpy().astype(np.uint8)).save(
90
+ os.path.join(dst_folder, src_img_filename))
91
+ print("SAVED %s: " % src_img_filename)
92
+
93
+ # Image.fromarray((src_tensor[0] * 255.0).permute(
94
+ # 1, 2, 0).cpu().numpy().astype(np.uint8)).save(
95
+ # os.path.join(dst_folder, src_img_filename))
96
+ # print("SAVED %s: " % src_img_filename)
97
+
98
+
99
+ if __name__ == "__main__":
100
+ SRC_FOLDER = "/home/cll/Desktop/sr_test_GT_HQ"
101
+ OUTPUT_RESOLUTION_SCALE = 1
102
+ DST_FOLDER = "/home/cll/Desktop/sr_test_degraded_LQ_512"
103
+ # DST_FOLDER = "/home/cll/Desktop/sr_test_GT_512"
104
+ os.makedirs(DST_FOLDER, exist_ok=True)
105
+
106
+ degrade_imgs(SRC_FOLDER, DST_FOLDER, OUTPUT_RESOLUTION_SCALE, 512)
KAIR/kernels/Levin09.mat ADDED
Binary file (32.2 kB). View file
 
KAIR/kernels/k_large_1.png ADDED
KAIR/kernels/k_large_2.png ADDED
KAIR/kernels/kernels_12.mat ADDED
Binary file (11.2 kB). View file
 
KAIR/kernels/kernels_bicubicx234.mat ADDED
Binary file (7.36 kB). View file
 
KAIR/kernels/srmd_pca_matlab.mat ADDED
Binary file (10.9 kB). View file
 
KAIR/main_challenge_sr.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import logging
3
+ import time
4
+ from collections import OrderedDict
5
+ import torch
6
+
7
+ from utils import utils_logger
8
+ from utils import utils_image as util
9
+ # from utils import utils_model
10
+
11
+
12
+ '''
13
+ This code can help you to calculate:
14
+ `FLOPs`, `#Params`, `Runtime`, `#Activations`, `#Conv`, and `Max Memory Allocated`.
15
+
16
+ - `#Params' denotes the total number of parameters.
17
+ - `FLOPs' is the abbreviation for floating point operations.
18
+ - `#Activations' measures the number of elements of all outputs of convolutional layers.
19
+ - `Memory' represents maximum GPU memory consumption according to the PyTorch function torch.cuda.max_memory_allocated().
20
+ - `#Conv' represents the number of convolutional layers.
21
+ - `FLOPs', `#Activations', and `Memory' are tested on an LR image of size 256x256.
22
+
23
+ For more information, please refer to ECCVW paper "AIM 2020 Challenge on Efficient Super-Resolution: Methods and Results".
24
+
25
+ # If you use this code, please consider the following citations:
26
+
27
+ @inproceedings{zhang2020aim,
28
+ title={AIM 2020 Challenge on Efficient Super-Resolution: Methods and Results},
29
+ author={Kai Zhang and Martin Danelljan and Yawei Li and Radu Timofte and others},
30
+ booktitle={European Conference on Computer Vision Workshops},
31
+ year={2020}
32
+ }
33
+ @inproceedings{zhang2019aim,
34
+ title={AIM 2019 Challenge on Constrained Super-Resolution: Methods and Results},
35
+ author={Kai Zhang and Shuhang Gu and Radu Timofte and others},
36
+ booktitle={IEEE International Conference on Computer Vision Workshops},
37
+ year={2019}
38
+ }
39
+
40
+ CuDNN (https://developer.nvidia.com/rdp/cudnn-archive) should be installed.
41
+
42
+ For `Memery` and `Runtime`, set 'print_modelsummary = False' and 'save_results = False'.
43
+ '''
44
+
45
+
46
+
47
+
48
+ def main():
49
+
50
+ utils_logger.logger_info('efficientsr_challenge', log_path='efficientsr_challenge.log')
51
+ logger = logging.getLogger('efficientsr_challenge')
52
+
53
+ # print(torch.__version__) # pytorch version
54
+ # print(torch.version.cuda) # cuda version
55
+ # print(torch.backends.cudnn.version()) # cudnn version
56
+
57
+ # --------------------------------
58
+ # basic settings
59
+ # --------------------------------
60
+ model_names = ['msrresnet', 'imdn']
61
+ model_id = 1 # set the model name
62
+ sf = 4
63
+ model_name = model_names[model_id]
64
+ logger.info('{:>16s} : {:s}'.format('Model Name', model_name))
65
+
66
+ testsets = 'testsets' # set path of testsets
67
+ testset_L = 'DIV2K_valid_LR' # set current testing dataset; 'DIV2K_test_LR'
68
+ testset_L = 'set12'
69
+
70
+ save_results = True
71
+ print_modelsummary = True # set False when calculating `Max Memery` and `Runtime`
72
+
73
+ torch.cuda.set_device(0) # set GPU ID
74
+ logger.info('{:>16s} : {:<d}'.format('GPU ID', torch.cuda.current_device()))
75
+ torch.cuda.empty_cache()
76
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
77
+
78
+ # --------------------------------
79
+ # define network and load model
80
+ # --------------------------------
81
+ if model_name == 'msrresnet':
82
+ from models.network_msrresnet import MSRResNet1 as net
83
+ model = net(in_nc=3, out_nc=3, nc=64, nb=16, upscale=4) # define network
84
+ model_path = os.path.join('model_zoo', 'msrresnet_x4_psnr.pth') # set model path
85
+ elif model_name == 'imdn':
86
+ from models.network_imdn import IMDN as net
87
+ model = net(in_nc=3, out_nc=3, nc=64, nb=8, upscale=4, act_mode='L', upsample_mode='pixelshuffle') # define network
88
+ model_path = os.path.join('model_zoo', 'imdn_x4.pth') # set model path
89
+
90
+ model.load_state_dict(torch.load(model_path), strict=True)
91
+ model.eval()
92
+ for k, v in model.named_parameters():
93
+ v.requires_grad = False
94
+ model = model.to(device)
95
+
96
+ # --------------------------------
97
+ # print model summary
98
+ # --------------------------------
99
+ if print_modelsummary:
100
+ from utils.utils_modelsummary import get_model_activation, get_model_flops
101
+ input_dim = (3, 256, 256) # set the input dimension
102
+
103
+ activations, num_conv2d = get_model_activation(model, input_dim)
104
+ logger.info('{:>16s} : {:<.4f} [M]'.format('#Activations', activations/10**6))
105
+ logger.info('{:>16s} : {:<d}'.format('#Conv2d', num_conv2d))
106
+
107
+ flops = get_model_flops(model, input_dim, False)
108
+ logger.info('{:>16s} : {:<.4f} [G]'.format('FLOPs', flops/10**9))
109
+
110
+ num_parameters = sum(map(lambda x: x.numel(), model.parameters()))
111
+ logger.info('{:>16s} : {:<.4f} [M]'.format('#Params', num_parameters/10**6))
112
+
113
+ # --------------------------------
114
+ # read image
115
+ # --------------------------------
116
+ L_path = os.path.join(testsets, testset_L)
117
+ E_path = os.path.join(testsets, testset_L+'_'+model_name)
118
+ util.mkdir(E_path)
119
+
120
+ # record runtime
121
+ test_results = OrderedDict()
122
+ test_results['runtime'] = []
123
+
124
+ logger.info('{:>16s} : {:s}'.format('Input Path', L_path))
125
+ logger.info('{:>16s} : {:s}'.format('Output Path', E_path))
126
+ idx = 0
127
+
128
+ start = torch.cuda.Event(enable_timing=True)
129
+ end = torch.cuda.Event(enable_timing=True)
130
+
131
+ for img in util.get_image_paths(L_path):
132
+
133
+ # --------------------------------
134
+ # (1) img_L
135
+ # --------------------------------
136
+ idx += 1
137
+ img_name, ext = os.path.splitext(os.path.basename(img))
138
+ logger.info('{:->4d}--> {:>10s}'.format(idx, img_name+ext))
139
+
140
+ img_L = util.imread_uint(img, n_channels=3)
141
+ img_L = util.uint2tensor4(img_L)
142
+ torch.cuda.empty_cache()
143
+ img_L = img_L.to(device)
144
+
145
+ start.record()
146
+ img_E = model(img_L)
147
+ # img_E = utils_model.test_mode(model, img_L, mode=2, min_size=480, sf=sf) # use this to avoid 'out of memory' issue.
148
+ # logger.info('{:>16s} : {:<.3f} [M]'.format('Max Memery', torch.cuda.max_memory_allocated(torch.cuda.current_device())/1024**2)) # Memery
149
+ end.record()
150
+ torch.cuda.synchronize()
151
+ test_results['runtime'].append(start.elapsed_time(end)) # milliseconds
152
+
153
+
154
+ # torch.cuda.synchronize()
155
+ # start = time.time()
156
+ # img_E = model(img_L)
157
+ # torch.cuda.synchronize()
158
+ # end = time.time()
159
+ # test_results['runtime'].append(end-start) # seconds
160
+
161
+ # --------------------------------
162
+ # (2) img_E
163
+ # --------------------------------
164
+ img_E = util.tensor2uint(img_E)
165
+
166
+ if save_results:
167
+ util.imsave(img_E, os.path.join(E_path, img_name+ext))
168
+ ave_runtime = sum(test_results['runtime']) / len(test_results['runtime']) / 1000.0
169
+ logger.info('------> Average runtime of ({}) is : {:.6f} seconds'.format(L_path, ave_runtime))
170
+
171
+
172
+ if __name__ == '__main__':
173
+
174
+ main()
KAIR/main_download_pretrained_models.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import requests
4
+ import re
5
+
6
+
7
+ """
8
+ How to use:
9
+ download all the models:
10
+ python main_download_pretrained_models.py --models "all" --model_dir "model_zoo"
11
+
12
+ download DnCNN models:
13
+ python main_download_pretrained_models.py --models "DnCNN" --model_dir "model_zoo"
14
+
15
+ download SRMD models:
16
+ python main_download_pretrained_models.py --models "SRMD" --model_dir "model_zoo"
17
+
18
+ download BSRGAN models:
19
+ python main_download_pretrained_models.py --models "BSRGAN" --model_dir "model_zoo"
20
+
21
+ download FFDNet models:
22
+ python main_download_pretrained_models.py --models "FFDNet" --model_dir "model_zoo"
23
+
24
+ download DPSR models:
25
+ python main_download_pretrained_models.py --models "DPSR" --model_dir "model_zoo"
26
+
27
+ download SwinIR models:
28
+ python main_download_pretrained_models.py --models "SwinIR" --model_dir "model_zoo"
29
+
30
+ download VRT models:
31
+ python main_download_pretrained_models.py --models "VRT" --model_dir "model_zoo"
32
+
33
+ download other models:
34
+ python main_download_pretrained_models.py --models "others" --model_dir "model_zoo"
35
+
36
+ ------------------------------------------------------------------
37
+
38
+ download 'dncnn_15.pth' and 'dncnn_50.pth'
39
+ python main_download_pretrained_models.py --models "dncnn_15.pth dncnn_50.pth" --model_dir "model_zoo"
40
+
41
+ ------------------------------------------------------------------
42
+
43
+ download DnCNN models and 'BSRGAN.pth'
44
+ python main_download_pretrained_models.py --models "DnCNN BSRGAN.pth" --model_dir "model_zoo"
45
+
46
+ """
47
+
48
+
49
+ def download_pretrained_model(model_dir='model_zoo', model_name='dncnn3.pth'):
50
+ if os.path.exists(os.path.join(model_dir, model_name)):
51
+ print(f'already exists, skip downloading [{model_name}]')
52
+ else:
53
+ os.makedirs(model_dir, exist_ok=True)
54
+ if 'SwinIR' in model_name:
55
+ url = 'https://github.com/JingyunLiang/SwinIR/releases/download/v0.0/{}'.format(model_name)
56
+ elif 'VRT' in model_name:
57
+ url = 'https://github.com/JingyunLiang/VRT/releases/download/v0.0/{}'.format(model_name)
58
+ else:
59
+ url = 'https://github.com/cszn/KAIR/releases/download/v1.0/{}'.format(model_name)
60
+ r = requests.get(url, allow_redirects=True)
61
+ print(f'downloading [{model_dir}/{model_name}] ...')
62
+ open(os.path.join(model_dir, model_name), 'wb').write(r.content)
63
+ print('done!')
64
+
65
+
66
+ if __name__ == '__main__':
67
+ parser = argparse.ArgumentParser()
68
+ parser.add_argument('--models',
69
+ type=lambda s: re.split(' |, ', s),
70
+ default = "dncnn3.pth",
71
+ help='comma or space delimited list of characters, e.g., "DnCNN", "DnCNN BSRGAN.pth", "dncnn_15.pth dncnn_50.pth"')
72
+ parser.add_argument('--model_dir', type=str, default='model_zoo', help='path of model_zoo')
73
+ args = parser.parse_args()
74
+
75
+ print(f'trying to download {args.models}')
76
+
77
+ method_model_zoo = {'DnCNN': ['dncnn_15.pth', 'dncnn_25.pth', 'dncnn_50.pth', 'dncnn3.pth', 'dncnn_color_blind.pth', 'dncnn_gray_blind.pth'],
78
+ 'SRMD': ['srmdnf_x2.pth', 'srmdnf_x3.pth', 'srmdnf_x4.pth', 'srmd_x2.pth', 'srmd_x3.pth', 'srmd_x4.pth'],
79
+ 'DPSR': ['dpsr_x2.pth', 'dpsr_x3.pth', 'dpsr_x4.pth', 'dpsr_x4_gan.pth'],
80
+ 'FFDNet': ['ffdnet_color.pth', 'ffdnet_gray.pth', 'ffdnet_color_clip.pth', 'ffdnet_gray_clip.pth'],
81
+ 'USRNet': ['usrgan.pth', 'usrgan_tiny.pth', 'usrnet.pth', 'usrnet_tiny.pth'],
82
+ 'DPIR': ['drunet_gray.pth', 'drunet_color.pth', 'drunet_deblocking_color.pth', 'drunet_deblocking_grayscale.pth'],
83
+ 'BSRGAN': ['BSRGAN.pth', 'BSRNet.pth', 'BSRGANx2.pth'],
84
+ 'IRCNN': ['ircnn_color.pth', 'ircnn_gray.pth'],
85
+ 'SwinIR': ['001_classicalSR_DF2K_s64w8_SwinIR-M_x2.pth', '001_classicalSR_DF2K_s64w8_SwinIR-M_x3.pth',
86
+ '001_classicalSR_DF2K_s64w8_SwinIR-M_x4.pth', '001_classicalSR_DF2K_s64w8_SwinIR-M_x8.pth',
87
+ '001_classicalSR_DIV2K_s48w8_SwinIR-M_x2.pth', '001_classicalSR_DIV2K_s48w8_SwinIR-M_x3.pth',
88
+ '001_classicalSR_DIV2K_s48w8_SwinIR-M_x4.pth', '001_classicalSR_DIV2K_s48w8_SwinIR-M_x8.pth',
89
+ '002_lightweightSR_DIV2K_s64w8_SwinIR-S_x2.pth', '002_lightweightSR_DIV2K_s64w8_SwinIR-S_x3.pth',
90
+ '002_lightweightSR_DIV2K_s64w8_SwinIR-S_x4.pth', '003_realSR_BSRGAN_DFO_s64w8_SwinIR-M_x4_GAN.pth',
91
+ '003_realSR_BSRGAN_DFO_s64w8_SwinIR-M_x4_PSNR.pth', '004_grayDN_DFWB_s128w8_SwinIR-M_noise15.pth',
92
+ '004_grayDN_DFWB_s128w8_SwinIR-M_noise25.pth', '004_grayDN_DFWB_s128w8_SwinIR-M_noise50.pth',
93
+ '005_colorDN_DFWB_s128w8_SwinIR-M_noise15.pth', '005_colorDN_DFWB_s128w8_SwinIR-M_noise25.pth',
94
+ '005_colorDN_DFWB_s128w8_SwinIR-M_noise50.pth', '006_CAR_DFWB_s126w7_SwinIR-M_jpeg10.pth',
95
+ '006_CAR_DFWB_s126w7_SwinIR-M_jpeg20.pth', '006_CAR_DFWB_s126w7_SwinIR-M_jpeg30.pth',
96
+ '006_CAR_DFWB_s126w7_SwinIR-M_jpeg40.pth'],
97
+ 'VRT': ['001_VRT_videosr_bi_REDS_6frames.pth', '002_VRT_videosr_bi_REDS_16frames.pth',
98
+ '003_VRT_videosr_bi_Vimeo_7frames.pth', '004_VRT_videosr_bd_Vimeo_7frames.pth',
99
+ '005_VRT_videodeblurring_DVD.pth', '006_VRT_videodeblurring_GoPro.pth',
100
+ '007_VRT_videodeblurring_REDS.pth', '008_VRT_videodenoising_DAVIS.pth'],
101
+ 'others': ['msrresnet_x4_psnr.pth', 'msrresnet_x4_gan.pth', 'imdn_x4.pth', 'RRDB.pth', 'ESRGAN.pth',
102
+ 'FSSR_DPED.pth', 'FSSR_JPEG.pth', 'RealSR_DPED.pth', 'RealSR_JPEG.pth']
103
+ }
104
+
105
+ method_zoo = list(method_model_zoo.keys())
106
+ model_zoo = []
107
+ for b in list(method_model_zoo.values()):
108
+ model_zoo += b
109
+
110
+ if 'all' in args.models:
111
+ for method in method_zoo:
112
+ for model_name in method_model_zoo[method]:
113
+ download_pretrained_model(args.model_dir, model_name)
114
+ else:
115
+ for method_model in args.models:
116
+ if method_model in method_zoo: # method, need for loop
117
+ for model_name in method_model_zoo[method_model]:
118
+ if 'SwinIR' in model_name:
119
+ download_pretrained_model(os.path.join(args.model_dir, 'swinir'), model_name)
120
+ elif 'VRT' in model_name:
121
+ download_pretrained_model(os.path.join(args.model_dir, 'vrt'), model_name)
122
+ else:
123
+ download_pretrained_model(args.model_dir, model_name)
124
+ elif method_model in model_zoo: # model, do not need for loop
125
+ if 'SwinIR' in method_model:
126
+ download_pretrained_model(os.path.join(args.model_dir, 'swinir'), method_model)
127
+ elif 'VRT' in method_model:
128
+ download_pretrained_model(os.path.join(args.model_dir, 'vrt'), method_model)
129
+ else:
130
+ download_pretrained_model(args.model_dir, method_model)
131
+ else:
132
+ print(f'Do not find {method_model} from the pre-trained model zoo!')
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
KAIR/main_test_dncnn.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import logging
3
+ import argparse
4
+
5
+ import numpy as np
6
+ from datetime import datetime
7
+ from collections import OrderedDict
8
+ # from scipy.io import loadmat
9
+
10
+ import torch
11
+
12
+ from utils import utils_logger
13
+ from utils import utils_model
14
+ from utils import utils_image as util
15
+
16
+
17
+ '''
18
+ Spyder (Python 3.6)
19
+ PyTorch 1.1.0
20
+ Windows 10 or Linux
21
+
22
+ Kai Zhang (cskaizhang@gmail.com)
23
+ github: https://github.com/cszn/KAIR
24
+ https://github.com/cszn/DnCNN
25
+
26
+ @article{zhang2017beyond,
27
+ title={Beyond a gaussian denoiser: Residual learning of deep cnn for image denoising},
28
+ author={Zhang, Kai and Zuo, Wangmeng and Chen, Yunjin and Meng, Deyu and Zhang, Lei},
29
+ journal={IEEE Transactions on Image Processing},
30
+ volume={26},
31
+ number={7},
32
+ pages={3142--3155},
33
+ year={2017},
34
+ publisher={IEEE}
35
+ }
36
+
37
+ % If you have any question, please feel free to contact with me.
38
+ % Kai Zhang (e-mail: cskaizhang@gmail.com; github: https://github.com/cszn)
39
+
40
+ by Kai Zhang (12/Dec./2019)
41
+ '''
42
+
43
+ """
44
+ # --------------------------------------------
45
+ |--model_zoo # model_zoo
46
+ |--dncnn_15 # model_name
47
+ |--dncnn_25
48
+ |--dncnn_50
49
+ |--dncnn_gray_blind
50
+ |--dncnn_color_blind
51
+ |--dncnn3
52
+ |--testset # testsets
53
+ |--set12 # testset_name
54
+ |--bsd68
55
+ |--cbsd68
56
+ |--results # results
57
+ |--set12_dncnn_15 # result_name = testset_name + '_' + model_name
58
+ |--set12_dncnn_25
59
+ |--bsd68_dncnn_15
60
+ # --------------------------------------------
61
+ """
62
+
63
+
64
+ def main():
65
+
66
+ # ----------------------------------------
67
+ # Preparation
68
+ # ----------------------------------------
69
+ parser = argparse.ArgumentParser()
70
+ parser.add_argument('--model_name', type=str, default='dncnn_25', help='dncnn_15, dncnn_25, dncnn_50, dncnn_gray_blind, dncnn_color_blind, dncnn3')
71
+ parser.add_argument('--testset_name', type=str, default='set12', help='test set, bsd68 | set12')
72
+ parser.add_argument('--noise_level_img', type=int, default=15, help='noise level: 15, 25, 50')
73
+ parser.add_argument('--x8', type=bool, default=False, help='x8 to boost performance')
74
+ parser.add_argument('--show_img', type=bool, default=False, help='show the image')
75
+ parser.add_argument('--model_pool', type=str, default='model_zoo', help='path of model_zoo')
76
+ parser.add_argument('--testsets', type=str, default='testsets', help='path of testing folder')
77
+ parser.add_argument('--results', type=str, default='results', help='path of results')
78
+ parser.add_argument('--need_degradation', type=bool, default=True, help='add noise or not')
79
+ parser.add_argument('--task_current', type=str, default='dn', help='dn for denoising, fixed!')
80
+ parser.add_argument('--sf', type=int, default=1, help='unused for denoising')
81
+ args = parser.parse_args()
82
+
83
+ if 'color' in args.model_name:
84
+ n_channels = 3 # fixed, 1 for grayscale image, 3 for color image
85
+ else:
86
+ n_channels = 1 # fixed for grayscale image
87
+ if args.model_name in ['dncnn_gray_blind', 'dncnn_color_blind', 'dncnn3']:
88
+ nb = 20 # fixed
89
+ else:
90
+ nb = 17 # fixed
91
+
92
+ result_name = args.testset_name + '_' + args.model_name # fixed
93
+ border = args.sf if args.task_current == 'sr' else 0 # shave boader to calculate PSNR and SSIM
94
+ model_path = os.path.join(args.model_pool, args.model_name+'.pth')
95
+
96
+ # ----------------------------------------
97
+ # L_path, E_path, H_path
98
+ # ----------------------------------------
99
+
100
+ L_path = os.path.join(args.testsets, args.testset_name) # L_path, for Low-quality images
101
+ H_path = L_path # H_path, for High-quality images
102
+ E_path = os.path.join(args.results, result_name) # E_path, for Estimated images
103
+ util.mkdir(E_path)
104
+
105
+ if H_path == L_path:
106
+ args.need_degradation = True
107
+ logger_name = result_name
108
+ utils_logger.logger_info(logger_name, log_path=os.path.join(E_path, logger_name+'.log'))
109
+ logger = logging.getLogger(logger_name)
110
+
111
+ need_H = True if H_path is not None else False
112
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
113
+
114
+ # ----------------------------------------
115
+ # load model
116
+ # ----------------------------------------
117
+
118
+ from models.network_dncnn import DnCNN as net
119
+ model = net(in_nc=n_channels, out_nc=n_channels, nc=64, nb=nb, act_mode='R')
120
+ # model = net(in_nc=n_channels, out_nc=n_channels, nc=64, nb=nb, act_mode='BR') # use this if BN is not merged by utils_bnorm.merge_bn(model)
121
+ model.load_state_dict(torch.load(model_path), strict=True)
122
+ model.eval()
123
+ for k, v in model.named_parameters():
124
+ v.requires_grad = False
125
+ model = model.to(device)
126
+ logger.info('Model path: {:s}'.format(model_path))
127
+ number_parameters = sum(map(lambda x: x.numel(), model.parameters()))
128
+ logger.info('Params number: {}'.format(number_parameters))
129
+
130
+ test_results = OrderedDict()
131
+ test_results['psnr'] = []
132
+ test_results['ssim'] = []
133
+
134
+ logger.info('model_name:{}, image sigma:{}'.format(args.model_name, args.noise_level_img))
135
+ logger.info(L_path)
136
+ L_paths = util.get_image_paths(L_path)
137
+ H_paths = util.get_image_paths(H_path) if need_H else None
138
+
139
+ for idx, img in enumerate(L_paths):
140
+
141
+ # ------------------------------------
142
+ # (1) img_L
143
+ # ------------------------------------
144
+
145
+ img_name, ext = os.path.splitext(os.path.basename(img))
146
+ # logger.info('{:->4d}--> {:>10s}'.format(idx+1, img_name+ext))
147
+ img_L = util.imread_uint(img, n_channels=n_channels)
148
+ img_L = util.uint2single(img_L)
149
+
150
+ if args.need_degradation: # degradation process
151
+ np.random.seed(seed=0) # for reproducibility
152
+ img_L += np.random.normal(0, args.noise_level_img/255., img_L.shape)
153
+
154
+ util.imshow(util.single2uint(img_L), title='Noisy image with noise level {}'.format(args.noise_level_img)) if args.show_img else None
155
+
156
+ img_L = util.single2tensor4(img_L)
157
+ img_L = img_L.to(device)
158
+
159
+ # ------------------------------------
160
+ # (2) img_E
161
+ # ------------------------------------
162
+
163
+ if not args.x8:
164
+ img_E = model(img_L)
165
+ else:
166
+ img_E = utils_model.test_mode(model, img_L, mode=3)
167
+
168
+ img_E = util.tensor2uint(img_E)
169
+
170
+ if need_H:
171
+
172
+ # --------------------------------
173
+ # (3) img_H
174
+ # --------------------------------
175
+
176
+ img_H = util.imread_uint(H_paths[idx], n_channels=n_channels)
177
+ img_H = img_H.squeeze()
178
+
179
+ # --------------------------------
180
+ # PSNR and SSIM
181
+ # --------------------------------
182
+
183
+ psnr = util.calculate_psnr(img_E, img_H, border=border)
184
+ ssim = util.calculate_ssim(img_E, img_H, border=border)
185
+ test_results['psnr'].append(psnr)
186
+ test_results['ssim'].append(ssim)
187
+ logger.info('{:s} - PSNR: {:.2f} dB; SSIM: {:.4f}.'.format(img_name+ext, psnr, ssim))
188
+ util.imshow(np.concatenate([img_E, img_H], axis=1), title='Recovered / Ground-truth') if args.show_img else None
189
+
190
+ # ------------------------------------
191
+ # save results
192
+ # ------------------------------------
193
+
194
+ util.imsave(img_E, os.path.join(E_path, img_name+ext))
195
+
196
+ if need_H:
197
+ ave_psnr = sum(test_results['psnr']) / len(test_results['psnr'])
198
+ ave_ssim = sum(test_results['ssim']) / len(test_results['ssim'])
199
+ logger.info('Average PSNR/SSIM(RGB) - {} - PSNR: {:.2f} dB; SSIM: {:.4f}'.format(result_name, ave_psnr, ave_ssim))
200
+
201
+ if __name__ == '__main__':
202
+
203
+ main()