aravindhv10 commited on
Commit
0be46a0
·
1 Parent(s): 312ca62

Routine updates

Browse files
.gitattributes CHANGED
@@ -40,3 +40,10 @@ checkpoints/Model_80.pth filter=lfs diff=lfs merge=lfs -text
40
  checkpoints/AEMatter/AEM_RWA.ckpt filter=lfs diff=lfs merge=lfs -text
41
  checkpoints/StableDiffusion/90c7c97574f8db765509b6a5d2e7b2551b430a10cac03e37d368654eac5e8169cd149644d188be4b5b2f1b9f29e66b64a02535f622f2bf284c319b076224cb2b filter=lfs diff=lfs merge=lfs -text
42
  checkpoints/StableDiffusion/b970812225cfb95427c13e73b75eef66430e2a525876dddac494d70fe4ed0524cb197043e0ac3dc3026b32a45cd1d6d126ec2fe74a5bc3ef5df21836ca022b30 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
40
  checkpoints/AEMatter/AEM_RWA.ckpt filter=lfs diff=lfs merge=lfs -text
41
  checkpoints/StableDiffusion/90c7c97574f8db765509b6a5d2e7b2551b430a10cac03e37d368654eac5e8169cd149644d188be4b5b2f1b9f29e66b64a02535f622f2bf284c319b076224cb2b filter=lfs diff=lfs merge=lfs -text
42
  checkpoints/StableDiffusion/b970812225cfb95427c13e73b75eef66430e2a525876dddac494d70fe4ed0524cb197043e0ac3dc3026b32a45cd1d6d126ec2fe74a5bc3ef5df21836ca022b30 filter=lfs diff=lfs merge=lfs -text
43
+ checkpoints/MVANet/skin.pth filter=lfs diff=lfs merge=lfs -text
44
+ checkpoints/MVANet/garment.pth filter=lfs diff=lfs merge=lfs -text
45
+ demo/demo_lip.png filter=lfs diff=lfs merge=lfs -text
46
+ demo/lip-visualization.jpg filter=lfs diff=lfs merge=lfs -text
47
+ demo/demo_pascal.png filter=lfs diff=lfs merge=lfs -text
48
+ demo/demo_atr.png filter=lfs diff=lfs merge=lfs -text
49
+ demo/demo.jpg filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -1,28 +1,30 @@
1
- /ComfyUI_MVANet/download.sh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  /ComfyUI_MVANet/MVANet_inference.class.py
3
  /ComfyUI_MVANet/MVANet_inference.execute.py
4
  /ComfyUI_MVANet/MVANet_inference.function.py
5
  /ComfyUI_MVANet/MVANet_inference.import.py
6
- /ComfyUI_MVANet/MVANet_inference.run.sh
7
  /ComfyUI_MVANet/MVANet_inference.unify.sh
8
- /ComfyUI_MVANet/.#README.org
9
- data/
10
- demo/demo_atr.png
11
- demo/demo.jpg
12
- demo/demo_lip.png
13
- demo/demo_pascal.png
14
- demo/lip-visualization.jpg
15
  /git_add.txt
 
 
 
 
16
  log/
17
- /main.org
18
  pretrain_model/
19
- **/__pycache__
20
- /rm.txt
21
- /waste.txt
22
- ComfyUI_AEMatter/AEMatter.execute.py
23
- ComfyUI_AEMatter/__pycache__/__init__.cpython-310.pyc
24
- ComfyUI_AEMatter/AEMatter.run.sh
25
- ComfyUI_AEMatter/AEMatter.class.py
26
- ComfyUI_AEMatter/AEMatter.import.py
27
- ComfyUI_AEMatter/AEMatter.function.py
28
- ComfyUI_AEMatter/AEMatter.unify.sh
 
1
+ /ComfyUI_MVANet/__pycache__/__init__.cpython-310.pyc
2
+ /ComfyUI_MVANet/#README.org#
3
+ /ComfyUI_MVANet/.#README.org
4
+ /ComfyUI_MVANet/README.org~
5
+ /ComfyUI_MVANet/.README.org.~undo-tree~
6
+ /#main.org#
7
+ /.#main.org
8
+ /main.org~
9
+ /.main.org.~undo-tree~
10
+ /.README.md.~undo-tree~
11
+ /ComfyUI_MVANet/.#README.org
12
+ /ComfyUI_AEMatter/__pycache__/__init__.cpython-310.pyc
13
+ /ComfyUI_AEMatter/AEMatter.class.py
14
+ /ComfyUI_AEMatter/AEMatter.execute.py
15
+ /ComfyUI_AEMatter/AEMatter.function.py
16
+ /ComfyUI_AEMatter/AEMatter.import.py
17
  /ComfyUI_MVANet/MVANet_inference.class.py
18
  /ComfyUI_MVANet/MVANet_inference.execute.py
19
  /ComfyUI_MVANet/MVANet_inference.function.py
20
  /ComfyUI_MVANet/MVANet_inference.import.py
 
21
  /ComfyUI_MVANet/MVANet_inference.unify.sh
22
+ /ComfyUI_AEMatter/AEMatter.unify.sh
 
 
 
 
 
 
23
  /git_add.txt
24
+ /git_lfs_track.txt
25
+ /gitignore.txt
26
+ /rm.txt
27
+ /work.sh
28
  log/
 
29
  pretrain_model/
30
+ commit_and_push.sh
 
 
 
 
 
 
 
 
 
ComfyUI_AEMatter/AEMatter.run.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #!/bin/sh
2
+ . "${HOME}/dbnew.sh"
3
+ python3 './AEMatter.py'
ComfyUI_MVANet/MVANet_inference.run.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #!/bin/sh
2
+ . "${HOME}/dbnew.sh"
3
+ python3 './MVANet_inference.py'
ComfyUI_MVANet/download.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+ get_repo(){
3
+ DIR_REPO="${HOME}/GITHUB/$('echo' "${1}" | 'sed' 's/^git@github.com://g ; s@^https://github.com/@@g ; s@.git$@@g' )"
4
+ DIR_BASE="$('dirname' '--' "${DIR_REPO}")"
5
+ mkdir -pv -- "${DIR_BASE}"
6
+ cd "${DIR_BASE}"
7
+ git clone "${1}"
8
+ cd "${DIR_REPO}"
9
+ git pull
10
+ git submodule update --recursive --init
11
+ }
12
+
13
+ get_repo 'https://github.com/qianyu-dlut/MVANet.git'
checkpoints/MVANet/garment.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7604ed46e06fbcff3b8f38c8934d253617171d02aecdd028f0f01086d9344893
3
+ size 380785263
checkpoints/MVANet/skin.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c71afcdd9cb1be73e43d84f5ffc2ae12b4964cc13c8460fc0adb6d52a0603cd4
3
+ size 380782803
demo/demo.jpg ADDED

Git LFS Details

  • SHA256: 6871c209cc202232323f309bbdec6ef9c2834aedaa3aef3f50293c4e783f0fec
  • Pointer size: 131 Bytes
  • Size of remote file: 310 kB
demo/demo_atr.png ADDED

Git LFS Details

  • SHA256: f59b23d397fb6438c34c7dee1d3b076fa50b1fb46f2d37c552dcdb711a163026
  • Pointer size: 130 Bytes
  • Size of remote file: 15.7 kB
demo/demo_lip.png ADDED

Git LFS Details

  • SHA256: d3215fdaa9a3e3044fe386a9706db3e9a1b0db8d95d340f8d832b7079da35b03
  • Pointer size: 130 Bytes
  • Size of remote file: 15.2 kB
demo/demo_pascal.png ADDED

Git LFS Details

  • SHA256: 1b8341c054c1e4c560e017c718e22e635392917f3e8c09ecffd003cc5f0258c0
  • Pointer size: 130 Bytes
  • Size of remote file: 15.1 kB
demo/lip-visualization.jpg ADDED

Git LFS Details

  • SHA256: d311b9ac4871d4e05a6b29953b13d6431afb269514571992267ef7038953bf1d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.56 MB
main.org ADDED
@@ -0,0 +1,680 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ * COMMENT WORK SPACE
2
+ cd $HOME/HUGGINGFACE/aravindhv10/Self-Correction-Human-Parsing
3
+
4
+ ** ELISP
5
+ #+begin_src elisp
6
+ (save-buffer)
7
+ (org-babel-tangle)
8
+ (shell-command "./work.sh")
9
+ #+end_src
10
+
11
+ #+RESULTS:
12
+ : 0
13
+
14
+ ** ELISP
15
+ #+begin_src elisp
16
+ (shell-command "./commit_and_push.sh")
17
+ #+end_src
18
+
19
+ ** SHELL
20
+ #+begin_src sh :shebang #!/bin/sh :results output
21
+ git status
22
+ #+end_src
23
+
24
+ #+RESULTS:
25
+ #+begin_example
26
+ On branch main
27
+ Your branch is up to date with 'origin/main'.
28
+
29
+ Changes to be committed:
30
+ (use "git restore --staged <file>..." to unstage)
31
+ modified: .gitattributes
32
+ modified: .gitignore
33
+ new file: ComfyUI_AEMatter/AEMatter.run.sh
34
+ new file: ComfyUI_MVANet/MVANet_inference.run.sh
35
+ new file: ComfyUI_MVANet/download.sh
36
+ new file: checkpoints/MVANet/garment.pth
37
+ new file: checkpoints/MVANet/skin.pth
38
+ new file: demo/demo.jpg
39
+ new file: demo/demo_atr.png
40
+ new file: demo/demo_lip.png
41
+ new file: demo/demo_pascal.png
42
+ new file: demo/lip-visualization.jpg
43
+ new file: main.org
44
+ new file: training_code/MVANet/README.org
45
+
46
+ #+end_example
47
+
48
+ * Commit and push
49
+ #+begin_src sh :shebang #!/bin/sh :results output :tangle ./commit_and_push.sh
50
+ git commit -m 'Routine updates'
51
+ git push
52
+ #+end_src
53
+
54
+ * List of large files
55
+ #+begin_src conf :tangle ./git_lfs_track.txt
56
+ checkpoints/AEMatter/AEM_RWA.ckpt
57
+ checkpoints/atr.pth
58
+ checkpoints/lip.pth
59
+ checkpoints/Model_80.pth
60
+ checkpoints/MVANet/garment.pth
61
+ checkpoints/MVANet/skin.pth
62
+ checkpoints/pascal.pth
63
+ checkpoints/StableDiffusion/90c7c97574f8db765509b6a5d2e7b2551b430a10cac03e37d368654eac5e8169cd149644d188be4b5b2f1b9f29e66b64a02535f622f2bf284c319b076224cb2b
64
+ checkpoints/StableDiffusion/b970812225cfb95427c13e73b75eef66430e2a525876dddac494d70fe4ed0524cb197043e0ac3dc3026b32a45cd1d6d126ec2fe74a5bc3ef5df21836ca022b30
65
+ demo/demo_atr.png
66
+ demo/demo.jpg
67
+ demo/demo_lip.png
68
+ demo/demo_pascal.png
69
+ demo/lip-visualization.jpg
70
+ #+end_src
71
+
72
+ * List of source files to add
73
+ #+begin_src conf :tangle ./git_add.txt
74
+ checkpoints/StableDiffusion/hash
75
+ ComfyUI_AEMatter/AEMatter.py
76
+ ComfyUI_AEMatter/AEMatter.run.sh
77
+ ComfyUI_AEMatter/__init__.py
78
+ ComfyUI_AEMatter/README.org
79
+ ComfyUI_MVANet/download.sh
80
+ ComfyUI_MVANet/__init__.py
81
+ ComfyUI_MVANet/MVANet_inference.py
82
+ ComfyUI_MVANet/MVANet_inference.run.sh
83
+ ComfyUI_MVANet/README.org
84
+ ComfyUI_MVANet/requirements.txt
85
+ datasets/datasets.py
86
+ datasets/__init__.py
87
+ datasets/simple_extractor_dataset.py
88
+ datasets/target_generation.py
89
+ environment.yaml
90
+ evaluate.py
91
+ .gitattributes
92
+ .gitignore
93
+ LICENSE
94
+ main.org
95
+ mhp_extension/coco_style_annotation_creator/human_to_coco.py
96
+ mhp_extension/coco_style_annotation_creator/pycococreatortools.py
97
+ mhp_extension/coco_style_annotation_creator/test_human2coco_format.py
98
+ mhp_extension/demo.ipynb
99
+ mhp_extension/detectron2/.circleci/config.yml
100
+ mhp_extension/detectron2/.clang-format
101
+ mhp_extension/detectron2/configs/Base-RCNN-C4.yaml
102
+ mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml
103
+ mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml
104
+ mhp_extension/detectron2/configs/Base-RetinaNet.yaml
105
+ mhp_extension/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
106
+ mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
107
+ mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
108
+ mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
109
+ mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
110
+ mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
111
+ mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
112
+ mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
113
+ mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
114
+ mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
115
+ mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
116
+ mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
117
+ mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
118
+ mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
119
+ mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
120
+ mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
121
+ mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
122
+ mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
123
+ mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
124
+ mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
125
+ mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
126
+ mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
127
+ mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
128
+ mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
129
+ mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
130
+ mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
131
+ mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
132
+ mhp_extension/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
133
+ mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
134
+ mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
135
+ mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
136
+ mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
137
+ mhp_extension/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
138
+ mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
139
+ mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
140
+ mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
141
+ mhp_extension/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
142
+ mhp_extension/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
143
+ mhp_extension/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
144
+ mhp_extension/detectron2/configs/Detectron1-Comparisons/README.md
145
+ mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
146
+ mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
147
+ mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
148
+ mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
149
+ mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
150
+ mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv_parsing.yaml
151
+ mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
152
+ mhp_extension/detectron2/configs/Misc/demo.yaml
153
+ mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
154
+ mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
155
+ mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
156
+ mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
157
+ mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
158
+ mhp_extension/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
159
+ mhp_extension/detectron2/configs/Misc/parsing_finetune_cihp.yaml
160
+ mhp_extension/detectron2/configs/Misc/parsing_inference.yaml
161
+ mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
162
+ mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
163
+ mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
164
+ mhp_extension/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml
165
+ mhp_extension/detectron2/configs/my_Base-RCNN-FPN.yaml
166
+ mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
167
+ mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
168
+ mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
169
+ mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
170
+ mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
171
+ mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
172
+ mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
173
+ mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
174
+ mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
175
+ mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
176
+ mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
177
+ mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
178
+ mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
179
+ mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
180
+ mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
181
+ mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
182
+ mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
183
+ mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
184
+ mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
185
+ mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
186
+ mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
187
+ mhp_extension/detectron2/configs/quick_schedules/README.md
188
+ mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
189
+ mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
190
+ mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
191
+ mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
192
+ mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
193
+ mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
194
+ mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
195
+ mhp_extension/detectron2/demo/demo.py
196
+ mhp_extension/detectron2/demo/predictor.py
197
+ mhp_extension/detectron2/demo/README.md
198
+ mhp_extension/detectron2/detectron2/checkpoint/c2_model_loading.py
199
+ mhp_extension/detectron2/detectron2/checkpoint/catalog.py
200
+ mhp_extension/detectron2/detectron2/checkpoint/detection_checkpoint.py
201
+ mhp_extension/detectron2/detectron2/checkpoint/__init__.py
202
+ mhp_extension/detectron2/detectron2/config/compat.py
203
+ mhp_extension/detectron2/detectron2/config/config.py
204
+ mhp_extension/detectron2/detectron2/config/defaults.py
205
+ mhp_extension/detectron2/detectron2/config/__init__.py
206
+ mhp_extension/detectron2/detectron2/data/build.py
207
+ mhp_extension/detectron2/detectron2/data/catalog.py
208
+ mhp_extension/detectron2/detectron2/data/common.py
209
+ mhp_extension/detectron2/detectron2/data/dataset_mapper.py
210
+ mhp_extension/detectron2/detectron2/data/datasets/builtin_meta.py
211
+ mhp_extension/detectron2/detectron2/data/datasets/builtin.py
212
+ mhp_extension/detectron2/detectron2/data/datasets/cityscapes.py
213
+ mhp_extension/detectron2/detectron2/data/datasets/coco.py
214
+ mhp_extension/detectron2/detectron2/data/datasets/__init__.py
215
+ mhp_extension/detectron2/detectron2/data/datasets/lvis.py
216
+ mhp_extension/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py
217
+ mhp_extension/detectron2/detectron2/data/datasets/pascal_voc.py
218
+ mhp_extension/detectron2/detectron2/data/datasets/README.md
219
+ mhp_extension/detectron2/detectron2/data/datasets/register_coco.py
220
+ mhp_extension/detectron2/detectron2/data/detection_utils.py
221
+ mhp_extension/detectron2/detectron2/data/__init__.py
222
+ mhp_extension/detectron2/detectron2/data/samplers/distributed_sampler.py
223
+ mhp_extension/detectron2/detectron2/data/samplers/grouped_batch_sampler.py
224
+ mhp_extension/detectron2/detectron2/data/samplers/__init__.py
225
+ mhp_extension/detectron2/detectron2/data/transforms/__init__.py
226
+ mhp_extension/detectron2/detectron2/data/transforms/transform_gen.py
227
+ mhp_extension/detectron2/detectron2/data/transforms/transform.py
228
+ mhp_extension/detectron2/detectron2/engine/defaults.py
229
+ mhp_extension/detectron2/detectron2/engine/hooks.py
230
+ mhp_extension/detectron2/detectron2/engine/__init__.py
231
+ mhp_extension/detectron2/detectron2/engine/launch.py
232
+ mhp_extension/detectron2/detectron2/engine/train_loop.py
233
+ mhp_extension/detectron2/detectron2/evaluation/cityscapes_evaluation.py
234
+ mhp_extension/detectron2/detectron2/evaluation/coco_evaluation.py
235
+ mhp_extension/detectron2/detectron2/evaluation/evaluator.py
236
+ mhp_extension/detectron2/detectron2/evaluation/__init__.py
237
+ mhp_extension/detectron2/detectron2/evaluation/lvis_evaluation.py
238
+ mhp_extension/detectron2/detectron2/evaluation/panoptic_evaluation.py
239
+ mhp_extension/detectron2/detectron2/evaluation/pascal_voc_evaluation.py
240
+ mhp_extension/detectron2/detectron2/evaluation/rotated_coco_evaluation.py
241
+ mhp_extension/detectron2/detectron2/evaluation/sem_seg_evaluation.py
242
+ mhp_extension/detectron2/detectron2/evaluation/testing.py
243
+ mhp_extension/detectron2/detectron2/export/api.py
244
+ mhp_extension/detectron2/detectron2/export/c10.py
245
+ mhp_extension/detectron2/detectron2/export/caffe2_export.py
246
+ mhp_extension/detectron2/detectron2/export/caffe2_inference.py
247
+ mhp_extension/detectron2/detectron2/export/caffe2_modeling.py
248
+ mhp_extension/detectron2/detectron2/export/__init__.py
249
+ mhp_extension/detectron2/detectron2/export/patcher.py
250
+ mhp_extension/detectron2/detectron2/export/README.md
251
+ mhp_extension/detectron2/detectron2/export/shared.py
252
+ mhp_extension/detectron2/detectron2/__init__.py
253
+ mhp_extension/detectron2/detectron2/layers/batch_norm.py
254
+ mhp_extension/detectron2/detectron2/layers/blocks.py
255
+ mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
256
+ mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
257
+ mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
258
+ mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h
259
+ mhp_extension/detectron2/detectron2/layers/csrc/cuda_version.cu
260
+ mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu
261
+ mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu
262
+ mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv.h
263
+ mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
264
+ mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
265
+ mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h
266
+ mhp_extension/detectron2/detectron2/layers/csrc/README.md
267
+ mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp
268
+ mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu
269
+ mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign.h
270
+ mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp
271
+ mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu
272
+ mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h
273
+ mhp_extension/detectron2/detectron2/layers/csrc/vision.cpp
274
+ mhp_extension/detectron2/detectron2/layers/deform_conv.py
275
+ mhp_extension/detectron2/detectron2/layers/__init__.py
276
+ mhp_extension/detectron2/detectron2/layers/mask_ops.py
277
+ mhp_extension/detectron2/detectron2/layers/nms.py
278
+ mhp_extension/detectron2/detectron2/layers/roi_align.py
279
+ mhp_extension/detectron2/detectron2/layers/roi_align_rotated.py
280
+ mhp_extension/detectron2/detectron2/layers/rotated_boxes.py
281
+ mhp_extension/detectron2/detectron2/layers/shape_spec.py
282
+ mhp_extension/detectron2/detectron2/layers/wrappers.py
283
+ mhp_extension/detectron2/detectron2/modeling/anchor_generator.py
284
+ mhp_extension/detectron2/detectron2/modeling/backbone/backbone.py
285
+ mhp_extension/detectron2/detectron2/modeling/backbone/build.py
286
+ mhp_extension/detectron2/detectron2/modeling/backbone/fpn.py
287
+ mhp_extension/detectron2/detectron2/modeling/backbone/__init__.py
288
+ mhp_extension/detectron2/detectron2/modeling/backbone/resnet.py
289
+ mhp_extension/detectron2/detectron2/modeling/box_regression.py
290
+ mhp_extension/detectron2/detectron2/modeling/__init__.py
291
+ mhp_extension/detectron2/detectron2/modeling/matcher.py
292
+ mhp_extension/detectron2/detectron2/modeling/meta_arch/build.py
293
+ mhp_extension/detectron2/detectron2/modeling/meta_arch/__init__.py
294
+ mhp_extension/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py
295
+ mhp_extension/detectron2/detectron2/modeling/meta_arch/rcnn.py
296
+ mhp_extension/detectron2/detectron2/modeling/meta_arch/retinanet.py
297
+ mhp_extension/detectron2/detectron2/modeling/meta_arch/semantic_seg.py
298
+ mhp_extension/detectron2/detectron2/modeling/poolers.py
299
+ mhp_extension/detectron2/detectron2/modeling/postprocessing.py
300
+ mhp_extension/detectron2/detectron2/modeling/proposal_generator/build.py
301
+ mhp_extension/detectron2/detectron2/modeling/proposal_generator/__init__.py
302
+ mhp_extension/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py
303
+ mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn_outputs.py
304
+ mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn.py
305
+ mhp_extension/detectron2/detectron2/modeling/proposal_generator/rrpn.py
306
+ mhp_extension/detectron2/detectron2/modeling/roi_heads/box_head.py
307
+ mhp_extension/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py
308
+ mhp_extension/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py
309
+ mhp_extension/detectron2/detectron2/modeling/roi_heads/__init__.py
310
+ mhp_extension/detectron2/detectron2/modeling/roi_heads/keypoint_head.py
311
+ mhp_extension/detectron2/detectron2/modeling/roi_heads/mask_head.py
312
+ mhp_extension/detectron2/detectron2/modeling/roi_heads/roi_heads.py
313
+ mhp_extension/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py
314
+ mhp_extension/detectron2/detectron2/modeling/sampling.py
315
+ mhp_extension/detectron2/detectron2/modeling/test_time_augmentation.py
316
+ mhp_extension/detectron2/detectron2/model_zoo/__init__.py
317
+ mhp_extension/detectron2/detectron2/model_zoo/model_zoo.py
318
+ mhp_extension/detectron2/detectron2/solver/build.py
319
+ mhp_extension/detectron2/detectron2/solver/__init__.py
320
+ mhp_extension/detectron2/detectron2/solver/lr_scheduler.py
321
+ mhp_extension/detectron2/detectron2/structures/boxes.py
322
+ mhp_extension/detectron2/detectron2/structures/image_list.py
323
+ mhp_extension/detectron2/detectron2/structures/__init__.py
324
+ mhp_extension/detectron2/detectron2/structures/instances.py
325
+ mhp_extension/detectron2/detectron2/structures/keypoints.py
326
+ mhp_extension/detectron2/detectron2/structures/masks.py
327
+ mhp_extension/detectron2/detectron2/structures/rotated_boxes.py
328
+ mhp_extension/detectron2/detectron2/utils/analysis.py
329
+ mhp_extension/detectron2/detectron2/utils/collect_env.py
330
+ mhp_extension/detectron2/detectron2/utils/colormap.py
331
+ mhp_extension/detectron2/detectron2/utils/comm.py
332
+ mhp_extension/detectron2/detectron2/utils/env.py
333
+ mhp_extension/detectron2/detectron2/utils/events.py
334
+ mhp_extension/detectron2/detectron2/utils/__init__.py
335
+ mhp_extension/detectron2/detectron2/utils/logger.py
336
+ mhp_extension/detectron2/detectron2/utils/memory.py
337
+ mhp_extension/detectron2/detectron2/utils/README.md
338
+ mhp_extension/detectron2/detectron2/utils/registry.py
339
+ mhp_extension/detectron2/detectron2/utils/serialize.py
340
+ mhp_extension/detectron2/detectron2/utils/video_visualizer.py
341
+ mhp_extension/detectron2/detectron2/utils/visualizer.py
342
+ mhp_extension/detectron2/dev/linter.sh
343
+ mhp_extension/detectron2/dev/packaging/build_all_wheels.sh
344
+ mhp_extension/detectron2/dev/packaging/build_wheel.sh
345
+ mhp_extension/detectron2/dev/packaging/gen_wheel_index.sh
346
+ mhp_extension/detectron2/dev/packaging/pkg_helpers.bash
347
+ mhp_extension/detectron2/dev/packaging/README.md
348
+ mhp_extension/detectron2/dev/parse_results.sh
349
+ mhp_extension/detectron2/dev/README.md
350
+ mhp_extension/detectron2/dev/run_inference_tests.sh
351
+ mhp_extension/detectron2/dev/run_instant_tests.sh
352
+ mhp_extension/detectron2/docker/docker-compose.yml
353
+ mhp_extension/detectron2/docker/Dockerfile
354
+ mhp_extension/detectron2/docker/Dockerfile-circleci
355
+ mhp_extension/detectron2/docker/README.md
356
+ mhp_extension/detectron2/docs/conf.py
357
+ mhp_extension/detectron2/docs/.gitignore
358
+ mhp_extension/detectron2/docs/index.rst
359
+ mhp_extension/detectron2/docs/Makefile
360
+ mhp_extension/detectron2/docs/modules/checkpoint.rst
361
+ mhp_extension/detectron2/docs/modules/config.rst
362
+ mhp_extension/detectron2/docs/modules/data.rst
363
+ mhp_extension/detectron2/docs/modules/engine.rst
364
+ mhp_extension/detectron2/docs/modules/evaluation.rst
365
+ mhp_extension/detectron2/docs/modules/export.rst
366
+ mhp_extension/detectron2/docs/modules/index.rst
367
+ mhp_extension/detectron2/docs/modules/layers.rst
368
+ mhp_extension/detectron2/docs/modules/modeling.rst
369
+ mhp_extension/detectron2/docs/modules/model_zoo.rst
370
+ mhp_extension/detectron2/docs/modules/solver.rst
371
+ mhp_extension/detectron2/docs/modules/structures.rst
372
+ mhp_extension/detectron2/docs/modules/utils.rst
373
+ mhp_extension/detectron2/docs/notes/benchmarks.md
374
+ mhp_extension/detectron2/docs/notes/changelog.md
375
+ mhp_extension/detectron2/docs/notes/compatibility.md
376
+ mhp_extension/detectron2/docs/notes/contributing.md
377
+ mhp_extension/detectron2/docs/notes/index.rst
378
+ mhp_extension/detectron2/docs/README.md
379
+ mhp_extension/detectron2/docs/tutorials/builtin_datasets.md
380
+ mhp_extension/detectron2/docs/tutorials/configs.md
381
+ mhp_extension/detectron2/docs/tutorials/data_loading.md
382
+ mhp_extension/detectron2/docs/tutorials/datasets.md
383
+ mhp_extension/detectron2/docs/tutorials/deployment.md
384
+ mhp_extension/detectron2/docs/tutorials/evaluation.md
385
+ mhp_extension/detectron2/docs/tutorials/extend.md
386
+ mhp_extension/detectron2/docs/tutorials/getting_started.md
387
+ mhp_extension/detectron2/docs/tutorials/index.rst
388
+ mhp_extension/detectron2/docs/tutorials/install.md
389
+ mhp_extension/detectron2/docs/tutorials/models.md
390
+ mhp_extension/detectron2/docs/tutorials/README.md
391
+ mhp_extension/detectron2/docs/tutorials/training.md
392
+ mhp_extension/detectron2/docs/tutorials/write-models.md
393
+ mhp_extension/detectron2/.flake8
394
+ mhp_extension/detectron2/GETTING_STARTED.md
395
+ mhp_extension/detectron2/.gitignore
396
+ mhp_extension/detectron2/INSTALL.md
397
+ mhp_extension/detectron2/LICENSE
398
+ mhp_extension/detectron2/MODEL_ZOO.md
399
+ mhp_extension/detectron2/projects/DensePose/apply_net.py
400
+ mhp_extension/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml
401
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml
402
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml
403
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml
404
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml
405
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml
406
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml
407
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml
408
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml
409
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml
410
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml
411
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml
412
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml
413
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml
414
+ mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml
415
+ mhp_extension/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml
416
+ mhp_extension/detectron2/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml
417
+ mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml
418
+ mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml
419
+ mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml
420
+ mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml
421
+ mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml
422
+ mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml
423
+ mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml
424
+ mhp_extension/detectron2/projects/DensePose/densepose/config.py
425
+ mhp_extension/detectron2/projects/DensePose/densepose/data/build.py
426
+ mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py
427
+ mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py
428
+ mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py
429
+ mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/__init__.py
430
+ mhp_extension/detectron2/projects/DensePose/densepose/data/__init__.py
431
+ mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py
432
+ mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py
433
+ mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py
434
+ mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py
435
+ mhp_extension/detectron2/projects/DensePose/densepose/__init__.py
436
+ mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py
437
+ mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py
438
+ mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py
439
+ mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py
440
+ mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py
441
+ mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py
442
+ mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py
443
+ mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py
444
+ mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py
445
+ mhp_extension/detectron2/projects/DensePose/dev/README.md
446
+ mhp_extension/detectron2/projects/DensePose/dev/run_inference_tests.sh
447
+ mhp_extension/detectron2/projects/DensePose/dev/run_instant_tests.sh
448
+ mhp_extension/detectron2/projects/DensePose/doc/GETTING_STARTED.md
449
+ mhp_extension/detectron2/projects/DensePose/doc/MODEL_ZOO.md
450
+ mhp_extension/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md
451
+ mhp_extension/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md
452
+ mhp_extension/detectron2/projects/DensePose/query_db.py
453
+ mhp_extension/detectron2/projects/DensePose/README.md
454
+ mhp_extension/detectron2/projects/DensePose/tests/common.py
455
+ mhp_extension/detectron2/projects/DensePose/tests/test_model_e2e.py
456
+ mhp_extension/detectron2/projects/DensePose/tests/test_setup.py
457
+ mhp_extension/detectron2/projects/DensePose/tests/test_structures.py
458
+ mhp_extension/detectron2/projects/DensePose/train_net.py
459
+ mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-PointRend-RCNN-FPN.yaml
460
+ mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml
461
+ mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml
462
+ mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml
463
+ mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_parsing.yaml
464
+ mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_parsing.yaml
465
+ mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/Base-PointRend-Semantic-FPN.yaml
466
+ mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml
467
+ mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_50_FPN_1x_coco.yaml
468
+ mhp_extension/detectron2/projects/PointRend/finetune_net.py
469
+ mhp_extension/detectron2/projects/PointRend/logs/hadoop.kylin.libdfs.log
470
+ mhp_extension/detectron2/projects/PointRend/point_rend/coarse_mask_head.py
471
+ mhp_extension/detectron2/projects/PointRend/point_rend/color_augmentation.py
472
+ mhp_extension/detectron2/projects/PointRend/point_rend/config.py
473
+ mhp_extension/detectron2/projects/PointRend/point_rend/dataset_mapper.py
474
+ mhp_extension/detectron2/projects/PointRend/point_rend/__init__.py
475
+ mhp_extension/detectron2/projects/PointRend/point_rend/point_features.py
476
+ mhp_extension/detectron2/projects/PointRend/point_rend/point_head.py
477
+ mhp_extension/detectron2/projects/PointRend/point_rend/roi_heads.py
478
+ mhp_extension/detectron2/projects/PointRend/point_rend/semantic_seg.py
479
+ mhp_extension/detectron2/projects/PointRend/README.md
480
+ mhp_extension/detectron2/projects/PointRend/run.sh
481
+ mhp_extension/detectron2/projects/PointRend/train_net.py
482
+ mhp_extension/detectron2/projects/README.md
483
+ mhp_extension/detectron2/projects/TensorMask/configs/Base-TensorMask.yaml
484
+ mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_1x.yaml
485
+ mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_6x.yaml
486
+ mhp_extension/detectron2/projects/TensorMask/README.md
487
+ mhp_extension/detectron2/projects/TensorMask/setup.py
488
+ mhp_extension/detectron2/projects/TensorMask/tensormask/arch.py
489
+ mhp_extension/detectron2/projects/TensorMask/tensormask/config.py
490
+ mhp_extension/detectron2/projects/TensorMask/tensormask/__init__.py
491
+ mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat_cuda.cu
492
+ mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h
493
+ mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/vision.cpp
494
+ mhp_extension/detectron2/projects/TensorMask/tensormask/layers/__init__.py
495
+ mhp_extension/detectron2/projects/TensorMask/tensormask/layers/swap_align2nat.py
496
+ mhp_extension/detectron2/projects/TensorMask/tests/__init__.py
497
+ mhp_extension/detectron2/projects/TensorMask/tests/test_swap_align2nat.py
498
+ mhp_extension/detectron2/projects/TensorMask/train_net.py
499
+ mhp_extension/detectron2/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml
500
+ mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml
501
+ mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml
502
+ mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_3x.yaml
503
+ mhp_extension/detectron2/projects/TridentNet/README.md
504
+ mhp_extension/detectron2/projects/TridentNet/train_net.py
505
+ mhp_extension/detectron2/projects/TridentNet/tridentnet/config.py
506
+ mhp_extension/detectron2/projects/TridentNet/tridentnet/__init__.py
507
+ mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_backbone.py
508
+ mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_conv.py
509
+ mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rcnn.py
510
+ mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rpn.py
511
+ mhp_extension/detectron2/README.md
512
+ mhp_extension/detectron2/setup.cfg
513
+ mhp_extension/detectron2/setup.py
514
+ mhp_extension/detectron2/tests/data/__init__.py
515
+ mhp_extension/detectron2/tests/data/test_coco.py
516
+ mhp_extension/detectron2/tests/data/test_detection_utils.py
517
+ mhp_extension/detectron2/tests/data/test_rotation_transform.py
518
+ mhp_extension/detectron2/tests/data/test_sampler.py
519
+ mhp_extension/detectron2/tests/data/test_transforms.py
520
+ mhp_extension/detectron2/tests/__init__.py
521
+ mhp_extension/detectron2/tests/layers/__init__.py
522
+ mhp_extension/detectron2/tests/layers/test_mask_ops.py
523
+ mhp_extension/detectron2/tests/layers/test_nms_rotated.py
524
+ mhp_extension/detectron2/tests/layers/test_roi_align.py
525
+ mhp_extension/detectron2/tests/layers/test_roi_align_rotated.py
526
+ mhp_extension/detectron2/tests/modeling/__init__.py
527
+ mhp_extension/detectron2/tests/modeling/test_anchor_generator.py
528
+ mhp_extension/detectron2/tests/modeling/test_box2box_transform.py
529
+ mhp_extension/detectron2/tests/modeling/test_fast_rcnn.py
530
+ mhp_extension/detectron2/tests/modeling/test_model_e2e.py
531
+ mhp_extension/detectron2/tests/modeling/test_roi_heads.py
532
+ mhp_extension/detectron2/tests/modeling/test_roi_pooler.py
533
+ mhp_extension/detectron2/tests/modeling/test_rpn.py
534
+ mhp_extension/detectron2/tests/README.md
535
+ mhp_extension/detectron2/tests/structures/__init__.py
536
+ mhp_extension/detectron2/tests/structures/test_boxes.py
537
+ mhp_extension/detectron2/tests/structures/test_imagelist.py
538
+ mhp_extension/detectron2/tests/structures/test_instances.py
539
+ mhp_extension/detectron2/tests/structures/test_rotated_boxes.py
540
+ mhp_extension/detectron2/tests/test_checkpoint.py
541
+ mhp_extension/detectron2/tests/test_config.py
542
+ mhp_extension/detectron2/tests/test_export_caffe2.py
543
+ mhp_extension/detectron2/tests/test_model_analysis.py
544
+ mhp_extension/detectron2/tests/test_model_zoo.py
545
+ mhp_extension/detectron2/tests/test_visualizer.py
546
+ mhp_extension/detectron2/tools/analyze_model.py
547
+ mhp_extension/detectron2/tools/benchmark.py
548
+ mhp_extension/detectron2/tools/convert-torchvision-to-d2.py
549
+ mhp_extension/detectron2/tools/deploy/caffe2_converter.py
550
+ mhp_extension/detectron2/tools/deploy/caffe2_mask_rcnn.cpp
551
+ mhp_extension/detectron2/tools/deploy/README.md
552
+ mhp_extension/detectron2/tools/deploy/torchscript_traced_mask_rcnn.cpp
553
+ mhp_extension/detectron2/tools/finetune_net.py
554
+ mhp_extension/detectron2/tools/inference.sh
555
+ mhp_extension/detectron2/tools/plain_train_net.py
556
+ mhp_extension/detectron2/tools/README.md
557
+ mhp_extension/detectron2/tools/run.sh
558
+ mhp_extension/detectron2/tools/train_net.py
559
+ mhp_extension/detectron2/tools/visualize_data.py
560
+ mhp_extension/detectron2/tools/visualize_json_results.py
561
+ mhp_extension/global_local_parsing/global_local_datasets.py
562
+ mhp_extension/global_local_parsing/global_local_evaluate.py
563
+ mhp_extension/global_local_parsing/global_local_train.py
564
+ mhp_extension/global_local_parsing/make_id_list.py
565
+ mhp_extension/logits_fusion.py
566
+ mhp_extension/make_crop_and_mask_w_mask_nms.py
567
+ mhp_extension/README.md
568
+ mhp_extension/scripts/make_coco_style_annotation.sh
569
+ mhp_extension/scripts/make_crop.sh
570
+ mhp_extension/scripts/parsing_fusion.sh
571
+ modules/bn.py
572
+ modules/deeplab.py
573
+ modules/dense.py
574
+ modules/functions.py
575
+ modules/__init__.py
576
+ modules/misc.py
577
+ modules/residual.py
578
+ modules/src/checks.h
579
+ modules/src/inplace_abn.cpp
580
+ modules/src/inplace_abn_cpu.cpp
581
+ modules/src/inplace_abn_cuda.cu
582
+ modules/src/inplace_abn_cuda_half.cu
583
+ modules/src/inplace_abn.h
584
+ modules/src/utils/checks.h
585
+ modules/src/utils/common.h
586
+ modules/src/utils/cuda.cuh
587
+ networks/AugmentCE2P.py
588
+ networks/backbone/mobilenetv2.py
589
+ networks/backbone/resnet.py
590
+ networks/backbone/resnext.py
591
+ networks/context_encoding/aspp.py
592
+ networks/context_encoding/ocnet.py
593
+ networks/context_encoding/psp.py
594
+ networks/__init__.py
595
+ README.md
596
+ requirements.txt
597
+ simple_extractor.py
598
+ training_code/MVANet/README.org
599
+ train.py
600
+ utils/consistency_loss.py
601
+ utils/criterion.py
602
+ utils/encoding.py
603
+ utils/__init__.py
604
+ utils/kl_loss.py
605
+ utils/lovasz_softmax.py
606
+ utils/miou.py
607
+ utils/schp.py
608
+ utils/soft_dice_loss.py
609
+ utils/transforms.py
610
+ utils/warmup_scheduler.py
611
+ #+end_src
612
+
613
+ * List of files to remove
614
+ #+begin_src conf :tangle ./rm.txt
615
+ ComfyUI_MVANet/__pycache__/__init__.cpython-310.pyc
616
+ ComfyUI_MVANet/#README.org#
617
+ ComfyUI_MVANet/.#README.org
618
+ ComfyUI_MVANet/README.org~
619
+ ComfyUI_MVANet/.README.org.~undo-tree~
620
+ #main.org#
621
+ .#main.org
622
+ main.org~
623
+ .main.org.~undo-tree~
624
+ .README.md.~undo-tree~
625
+ ComfyUI_MVANet/.#README.org
626
+ ComfyUI_AEMatter/__pycache__/__init__.cpython-310.pyc
627
+ ComfyUI_AEMatter/AEMatter.class.py
628
+ ComfyUI_AEMatter/AEMatter.execute.py
629
+ ComfyUI_AEMatter/AEMatter.function.py
630
+ ComfyUI_AEMatter/AEMatter.import.py
631
+ ComfyUI_MVANet/MVANet_inference.class.py
632
+ ComfyUI_MVANet/MVANet_inference.execute.py
633
+ ComfyUI_MVANet/MVANet_inference.function.py
634
+ ComfyUI_MVANet/MVANet_inference.import.py
635
+ ComfyUI_MVANet/MVANet_inference.unify.sh
636
+ ComfyUI_AEMatter/AEMatter.unify.sh
637
+ git_add.txt
638
+ git_lfs_track.txt
639
+ gitignore.txt
640
+ rm.txt
641
+ work.sh
642
+ #+end_src
643
+
644
+ * List of patterns to ignore
645
+ #+begin_src conf :tangle ./gitignore.txt
646
+ log/
647
+ pretrain_model/
648
+ commit_and_push.sh
649
+ #+end_src
650
+
651
+ * Main script to do everything
652
+ #+begin_src sh :shebang #!/bin/sh :results output :tangle ./work.sh
653
+ do_ignore(){
654
+ 'sed' 's@^@/@g' './rm.txt';
655
+ 'cat' './gitignore.txt';
656
+ }
657
+
658
+ do_add(){
659
+ 'sed' 's@^@("git" "lfs" "track" "./@g;s@$@");@g' './git_lfs_track.txt' ;
660
+ 'cat' './git_add.txt' './git_lfs_track.txt' | \
661
+ 'sed' 's@^@("git" "add" "./@g;s@$@");@g' ;
662
+ }
663
+
664
+ do_rm(){
665
+ 'sed' 's@^@("rm" "-vf" "--" "./@g ; s@$@");@g' './rm.txt' ;
666
+ }
667
+
668
+ all_commands(){
669
+ do_add
670
+ do_rm
671
+ }
672
+
673
+ do_all(){
674
+ do_ignore > './.gitignore'
675
+ all_commands | sh
676
+ }
677
+
678
+ do_all
679
+ #+end_src
680
+
training_code/MVANet/README.org ADDED
@@ -0,0 +1,2338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ * Requirements
2
+ #+begin_src conf :tangle ./requirements.txt
3
+ einops
4
+ pillow
5
+ prodigyopt
6
+ tensorboard
7
+ timm
8
+ torch
9
+ torchvision
10
+ #+end_src
11
+
12
+ * Download trained model
13
+ #+begin_src sh :shebang #!/bin/sh :results output :tangle ./download.sh
14
+ "efficient_download.sh" \
15
+ 'https://huggingface.co/aravindhv10/Self-Correction-Human-Parsing/resolve/main/checkpoints/Model_80.pth' \
16
+ 'Model_80.pth' \
17
+ '6ca28df33ba8476ac13be329a1b1b8b390da5d8042638fb124df3c067c2fe45bccde4366643b830066cbe0164ddbb978a1987a398b4a987f99d908903b44774f' \
18
+ "${HOME}/GITHUB/aravind-h-v/dreambooth_experiments/cloth_segmentation/MVANet_Train/pretrained_model/Model_80.pth" \
19
+ ;
20
+ #+end_src
21
+
22
+ * Swin code
23
+
24
+ ** swin.import.py
25
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./swin.import.py
26
+ import os
27
+ os.environ["CUDA_VISIBLE_DEVICES"] ='0'
28
+ #+end_src
29
+
30
+ ** swin.import.py
31
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./swin.import.py
32
+ import numpy as np
33
+ #+end_src
34
+
35
+ ** swin.import.py
36
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./swin.import.py
37
+ import torch
38
+ import torch.nn as nn
39
+ import torch.nn.functional as F
40
+ import torch.utils.checkpoint as checkpoint
41
+ #+end_src
42
+
43
+ ** swin.import.py
44
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./swin.import.py
45
+ from timm.models import load_checkpoint
46
+ from timm.models.layers import DropPath
47
+ from timm.models.layers import to_2tuple
48
+ from timm.models.layers import trunc_normal_
49
+
50
+ # from mmdet.utils import get_root_logger
51
+ #+end_src
52
+
53
+ ** swin.function.py
54
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./swin.function.py
55
+ def window_partition(x, window_size):
56
+ """
57
+ Args:
58
+ x: (B, H, W, C)
59
+ window_size (int): window size
60
+
61
+ Returns:
62
+ windows: (num_windows*B, window_size, window_size, C)
63
+ """
64
+ B, H, W, C = x.shape
65
+ x = x.view(B, H // window_size, window_size, W // window_size, window_size,
66
+ C)
67
+ windows = x.permute(0, 1, 3, 2, 4,
68
+ 5).contiguous().view(-1, window_size, window_size, C)
69
+ return windows
70
+
71
+
72
+ def window_reverse(windows, window_size, H, W):
73
+ """
74
+ Args:
75
+ windows: (num_windows*B, window_size, window_size, C)
76
+ window_size (int): Window size
77
+ H (int): Height of image
78
+ W (int): Width of image
79
+
80
+ Returns:
81
+ x: (B, H, W, C)
82
+ """
83
+ B = int(windows.shape[0] / (H * W / window_size / window_size))
84
+ x = windows.view(B, H // window_size, W // window_size, window_size,
85
+ window_size, -1)
86
+ x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
87
+ return x
88
+
89
+
90
+ def SwinT(pretrained=True):
91
+ model = SwinTransformer(embed_dim=96,
92
+ depths=[2, 2, 6, 2],
93
+ num_heads=[3, 6, 12, 24],
94
+ window_size=7)
95
+ # if pretrained is True:
96
+ # model.load_state_dict(torch.load(
97
+ # 'data/backbone_ckpt/swin_tiny_patch4_window7_224.pth',
98
+ # map_location='cpu')['model'],
99
+ # strict=False)
100
+
101
+ return model
102
+
103
+
104
+ def SwinS(pretrained=True):
105
+ model = SwinTransformer(embed_dim=96,
106
+ depths=[2, 2, 18, 2],
107
+ num_heads=[3, 6, 12, 24],
108
+ window_size=7)
109
+ # if pretrained is True:
110
+ # model.load_state_dict(torch.load(
111
+ # 'data/backbone_ckpt/swin_small_patch4_window7_224.pth',
112
+ # map_location='cpu')['model'],
113
+ # strict=False)
114
+
115
+ return model
116
+
117
+
118
+ def SwinB(pretrained=True):
119
+ model = SwinTransformer(embed_dim=128,
120
+ depths=[2, 2, 18, 2],
121
+ num_heads=[4, 8, 16, 32],
122
+ window_size=12)
123
+ # if pretrained is True:
124
+ # model.load_state_dict(
125
+ # torch.load('./swin_base_patch4_window12_384_22kto1k.pth',
126
+ # map_location='cpu')['model'],
127
+ # strict=False)
128
+
129
+ return model
130
+
131
+
132
+ def SwinL(pretrained=True):
133
+ model = SwinTransformer(embed_dim=192,
134
+ depths=[2, 2, 18, 2],
135
+ num_heads=[6, 12, 24, 48],
136
+ window_size=12)
137
+ # if pretrained is True:
138
+ # model.load_state_dict(torch.load(
139
+ # 'data/backbone_ckpt/swin_large_patch4_window12_384_22kto1k.pth',
140
+ # map_location='cpu')['model'],
141
+ # strict=False)
142
+
143
+ return model
144
+ #+end_src
145
+
146
+ ** swin.class.py
147
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./swin.class.py
148
+ class Mlp(nn.Module):
149
+ """ Multilayer perceptron."""
150
+
151
+ def __init__(self,
152
+ in_features,
153
+ hidden_features=None,
154
+ out_features=None,
155
+ act_layer=nn.GELU,
156
+ drop=0.):
157
+ super().__init__()
158
+ out_features = out_features or in_features
159
+ hidden_features = hidden_features or in_features
160
+ self.fc1 = nn.Linear(in_features, hidden_features)
161
+ self.act = act_layer()
162
+ self.fc2 = nn.Linear(hidden_features, out_features)
163
+ self.drop = nn.Dropout(drop)
164
+
165
+ def forward(self, x):
166
+ x = self.fc1(x)
167
+ x = self.act(x)
168
+ x = self.drop(x)
169
+ x = self.fc2(x)
170
+ x = self.drop(x)
171
+ return x
172
+
173
+
174
+ class WindowAttention(nn.Module):
175
+ """ Window based multi-head self attention (W-MSA) module with relative position bias.
176
+ It supports both of shifted and non-shifted window.
177
+
178
+ Args:
179
+ dim (int): Number of input channels.
180
+ window_size (tuple[int]): The height and width of the window.
181
+ num_heads (int): Number of attention heads.
182
+ qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
183
+ qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
184
+ attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
185
+ proj_drop (float, optional): Dropout ratio of output. Default: 0.0
186
+ """
187
+
188
+ def __init__(self,
189
+ dim,
190
+ window_size,
191
+ num_heads,
192
+ qkv_bias=True,
193
+ qk_scale=None,
194
+ attn_drop=0.,
195
+ proj_drop=0.):
196
+
197
+ super().__init__()
198
+ self.dim = dim
199
+ self.window_size = window_size # Wh, Ww
200
+ self.num_heads = num_heads
201
+ head_dim = dim // num_heads
202
+ self.scale = qk_scale or head_dim**-0.5
203
+
204
+ # define a parameter table of relative position bias
205
+ self.relative_position_bias_table = nn.Parameter(
206
+ torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1),
207
+ num_heads)) # 2*Wh-1 * 2*Ww-1, nH
208
+
209
+ # get pair-wise relative position index for each token inside the window
210
+ coords_h = torch.arange(self.window_size[0])
211
+ coords_w = torch.arange(self.window_size[1])
212
+ coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww
213
+ coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww
214
+ relative_coords = coords_flatten[:, :,
215
+ None] - coords_flatten[:,
216
+ None, :] # 2, Wh*Ww, Wh*Ww
217
+ relative_coords = relative_coords.permute(
218
+ 1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2
219
+ relative_coords[:, :,
220
+ 0] += self.window_size[0] - 1 # shift to start from 0
221
+ relative_coords[:, :, 1] += self.window_size[1] - 1
222
+ relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
223
+ relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww
224
+ self.register_buffer("relative_position_index",
225
+ relative_position_index)
226
+
227
+ self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
228
+ self.attn_drop = nn.Dropout(attn_drop)
229
+ self.proj = nn.Linear(dim, dim)
230
+ self.proj_drop = nn.Dropout(proj_drop)
231
+
232
+ trunc_normal_(self.relative_position_bias_table, std=.02)
233
+ self.softmax = nn.Softmax(dim=-1)
234
+
235
+ def forward(self, x, mask=None):
236
+ """ Forward function.
237
+
238
+ Args:
239
+ x: input features with shape of (num_windows*B, N, C)
240
+ mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
241
+ """
242
+ B_, N, C = x.shape
243
+ qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads,
244
+ C // self.num_heads).permute(2, 0, 3, 1, 4)
245
+ q, k, v = qkv[0], qkv[1], qkv[
246
+ 2] # make torchscript happy (cannot use tensor as tuple)
247
+
248
+ q = q * self.scale
249
+ attn = (q @ k.transpose(-2, -1))
250
+
251
+ relative_position_bias = self.relative_position_bias_table[
252
+ self.relative_position_index.view(-1)].view(
253
+ self.window_size[0] * self.window_size[1],
254
+ self.window_size[0] * self.window_size[1],
255
+ -1) # Wh*Ww,Wh*Ww,nH
256
+ relative_position_bias = relative_position_bias.permute(
257
+ 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww
258
+ attn = attn + relative_position_bias.unsqueeze(0)
259
+
260
+ if mask is not None:
261
+ nW = mask.shape[0]
262
+ attn = attn.view(B_ // nW, nW, self.num_heads, N,
263
+ N) + mask.unsqueeze(1).unsqueeze(0)
264
+ attn = attn.view(-1, self.num_heads, N, N)
265
+ attn = self.softmax(attn)
266
+ else:
267
+ attn = self.softmax(attn)
268
+
269
+ attn = self.attn_drop(attn)
270
+
271
+ x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
272
+ x = self.proj(x)
273
+ x = self.proj_drop(x)
274
+ return x
275
+
276
+
277
+ class SwinTransformerBlock(nn.Module):
278
+ """ Swin Transformer Block.
279
+
280
+ Args:
281
+ dim (int): Number of input channels.
282
+ num_heads (int): Number of attention heads.
283
+ window_size (int): Window size.
284
+ shift_size (int): Shift size for SW-MSA.
285
+ mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
286
+ qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
287
+ qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
288
+ drop (float, optional): Dropout rate. Default: 0.0
289
+ attn_drop (float, optional): Attention dropout rate. Default: 0.0
290
+ drop_path (float, optional): Stochastic depth rate. Default: 0.0
291
+ act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
292
+ norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
293
+ """
294
+
295
+ def __init__(self,
296
+ dim,
297
+ num_heads,
298
+ window_size=7,
299
+ shift_size=0,
300
+ mlp_ratio=4.,
301
+ qkv_bias=True,
302
+ qk_scale=None,
303
+ drop=0.,
304
+ attn_drop=0.,
305
+ drop_path=0.,
306
+ act_layer=nn.GELU,
307
+ norm_layer=nn.LayerNorm):
308
+ super().__init__()
309
+ self.dim = dim
310
+ self.num_heads = num_heads
311
+ self.window_size = window_size
312
+ self.shift_size = shift_size
313
+ self.mlp_ratio = mlp_ratio
314
+ assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
315
+
316
+ self.norm1 = norm_layer(dim)
317
+ self.attn = WindowAttention(dim,
318
+ window_size=to_2tuple(self.window_size),
319
+ num_heads=num_heads,
320
+ qkv_bias=qkv_bias,
321
+ qk_scale=qk_scale,
322
+ attn_drop=attn_drop,
323
+ proj_drop=drop)
324
+
325
+ self.drop_path = DropPath(
326
+ drop_path) if drop_path > 0. else nn.Identity()
327
+ self.norm2 = norm_layer(dim)
328
+ mlp_hidden_dim = int(dim * mlp_ratio)
329
+ self.mlp = Mlp(in_features=dim,
330
+ hidden_features=mlp_hidden_dim,
331
+ act_layer=act_layer,
332
+ drop=drop)
333
+
334
+ self.H = None
335
+ self.W = None
336
+
337
+ def forward(self, x, mask_matrix):
338
+ """ Forward function.
339
+
340
+ Args:
341
+ x: Input feature, tensor size (B, H*W, C).
342
+ H, W: Spatial resolution of the input feature.
343
+ mask_matrix: Attention mask for cyclic shift.
344
+ """
345
+ B, L, C = x.shape
346
+ H, W = self.H, self.W
347
+ assert L == H * W, "input feature has wrong size"
348
+
349
+ shortcut = x
350
+ x = self.norm1(x)
351
+ x = x.view(B, H, W, C)
352
+
353
+ # pad feature maps to multiples of window size
354
+ pad_l = pad_t = 0
355
+ pad_r = (self.window_size - W % self.window_size) % self.window_size
356
+ pad_b = (self.window_size - H % self.window_size) % self.window_size
357
+ x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
358
+ _, Hp, Wp, _ = x.shape
359
+
360
+ # cyclic shift
361
+ if self.shift_size > 0:
362
+ shifted_x = torch.roll(x,
363
+ shifts=(-self.shift_size, -self.shift_size),
364
+ dims=(1, 2))
365
+ attn_mask = mask_matrix
366
+ else:
367
+ shifted_x = x
368
+ attn_mask = None
369
+
370
+ # partition windows
371
+ x_windows = window_partition(
372
+ shifted_x, self.window_size) # nW*B, window_size, window_size, C
373
+ x_windows = x_windows.view(-1, self.window_size * self.window_size,
374
+ C) # nW*B, window_size*window_size, C
375
+
376
+ # W-MSA/SW-MSA
377
+ attn_windows = self.attn(
378
+ x_windows, mask=attn_mask) # nW*B, window_size*window_size, C
379
+
380
+ # merge windows
381
+ attn_windows = attn_windows.view(-1, self.window_size,
382
+ self.window_size, C)
383
+ shifted_x = window_reverse(attn_windows, self.window_size, Hp,
384
+ Wp) # B H' W' C
385
+
386
+ # reverse cyclic shift
387
+ if self.shift_size > 0:
388
+ x = torch.roll(shifted_x,
389
+ shifts=(self.shift_size, self.shift_size),
390
+ dims=(1, 2))
391
+ else:
392
+ x = shifted_x
393
+
394
+ if pad_r > 0 or pad_b > 0:
395
+ x = x[:, :H, :W, :].contiguous()
396
+
397
+ x = x.view(B, H * W, C)
398
+
399
+ # FFN
400
+ x = shortcut + self.drop_path(x)
401
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
402
+
403
+ return x
404
+
405
+
406
+ class PatchMerging(nn.Module):
407
+ """ Patch Merging Layer
408
+
409
+ Args:
410
+ dim (int): Number of input channels.
411
+ norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
412
+ """
413
+
414
+ def __init__(self, dim, norm_layer=nn.LayerNorm):
415
+ super().__init__()
416
+ self.dim = dim
417
+ self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
418
+ self.norm = norm_layer(4 * dim)
419
+
420
+ def forward(self, x, H, W):
421
+ """ Forward function.
422
+
423
+ Args:
424
+ x: Input feature, tensor size (B, H*W, C).
425
+ H, W: Spatial resolution of the input feature.
426
+ """
427
+ B, L, C = x.shape
428
+ assert L == H * W, "input feature has wrong size"
429
+
430
+ x = x.view(B, H, W, C)
431
+
432
+ # padding
433
+ pad_input = (H % 2 == 1) or (W % 2 == 1)
434
+ if pad_input:
435
+ x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))
436
+
437
+ x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C
438
+ x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C
439
+ x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C
440
+ x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C
441
+ x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C
442
+ x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C
443
+
444
+ x = self.norm(x)
445
+ x = self.reduction(x)
446
+
447
+ return x
448
+
449
+
450
+ class BasicLayer(nn.Module):
451
+ """ A basic Swin Transformer layer for one stage.
452
+
453
+ Args:
454
+ dim (int): Number of feature channels
455
+ depth (int): Depths of this stage.
456
+ num_heads (int): Number of attention head.
457
+ window_size (int): Local window size. Default: 7.
458
+ mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
459
+ qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
460
+ qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
461
+ drop (float, optional): Dropout rate. Default: 0.0
462
+ attn_drop (float, optional): Attention dropout rate. Default: 0.0
463
+ drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
464
+ norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
465
+ downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
466
+ use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
467
+ """
468
+
469
+ def __init__(self,
470
+ dim,
471
+ depth,
472
+ num_heads,
473
+ window_size=7,
474
+ mlp_ratio=4.,
475
+ qkv_bias=True,
476
+ qk_scale=None,
477
+ drop=0.,
478
+ attn_drop=0.,
479
+ drop_path=0.,
480
+ norm_layer=nn.LayerNorm,
481
+ downsample=None,
482
+ use_checkpoint=False):
483
+ super().__init__()
484
+ self.window_size = window_size
485
+ self.shift_size = window_size // 2
486
+ self.depth = depth
487
+ self.use_checkpoint = use_checkpoint
488
+
489
+ # build blocks
490
+ self.blocks = nn.ModuleList([
491
+ SwinTransformerBlock(dim=dim,
492
+ num_heads=num_heads,
493
+ window_size=window_size,
494
+ shift_size=0 if
495
+ (i % 2 == 0) else window_size // 2,
496
+ mlp_ratio=mlp_ratio,
497
+ qkv_bias=qkv_bias,
498
+ qk_scale=qk_scale,
499
+ drop=drop,
500
+ attn_drop=attn_drop,
501
+ drop_path=drop_path[i] if isinstance(
502
+ drop_path, list) else drop_path,
503
+ norm_layer=norm_layer) for i in range(depth)
504
+ ])
505
+
506
+ # patch merging layer
507
+ if downsample is not None:
508
+ self.downsample = downsample(dim=dim, norm_layer=norm_layer)
509
+ else:
510
+ self.downsample = None
511
+
512
+ def forward(self, x, H, W):
513
+ """ Forward function.
514
+
515
+ Args:
516
+ x: Input feature, tensor size (B, H*W, C).
517
+ H, W: Spatial resolution of the input feature.
518
+ """
519
+
520
+ # calculate attention mask for SW-MSA
521
+ Hp = int(np.ceil(H / self.window_size)) * self.window_size
522
+ Wp = int(np.ceil(W / self.window_size)) * self.window_size
523
+ img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1
524
+ h_slices = (slice(0, -self.window_size),
525
+ slice(-self.window_size,
526
+ -self.shift_size), slice(-self.shift_size, None))
527
+ w_slices = (slice(0, -self.window_size),
528
+ slice(-self.window_size,
529
+ -self.shift_size), slice(-self.shift_size, None))
530
+ cnt = 0
531
+ for h in h_slices:
532
+ for w in w_slices:
533
+ img_mask[:, h, w, :] = cnt
534
+ cnt += 1
535
+
536
+ mask_windows = window_partition(
537
+ img_mask, self.window_size) # nW, window_size, window_size, 1
538
+ mask_windows = mask_windows.view(-1,
539
+ self.window_size * self.window_size)
540
+ attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
541
+ attn_mask = attn_mask.masked_fill(attn_mask != 0,
542
+ float(-100.0)).masked_fill(
543
+ attn_mask == 0, float(0.0))
544
+
545
+ for blk in self.blocks:
546
+ blk.H, blk.W = H, W
547
+ if self.use_checkpoint:
548
+ x = checkpoint.checkpoint(blk, x, attn_mask)
549
+ else:
550
+ x = blk(x, attn_mask)
551
+ if self.downsample is not None:
552
+ x_down = self.downsample(x, H, W)
553
+ Wh, Ww = (H + 1) // 2, (W + 1) // 2
554
+ return x, H, W, x_down, Wh, Ww
555
+ else:
556
+ return x, H, W, x, H, W
557
+
558
+
559
+ class PatchEmbed(nn.Module):
560
+ """ Image to Patch Embedding
561
+
562
+ Args:
563
+ patch_size (int): Patch token size. Default: 4.
564
+ in_chans (int): Number of input image channels. Default: 3.
565
+ embed_dim (int): Number of linear projection output channels. Default: 96.
566
+ norm_layer (nn.Module, optional): Normalization layer. Default: None
567
+ """
568
+
569
+ def __init__(self,
570
+ patch_size=4,
571
+ in_chans=3,
572
+ embed_dim=96,
573
+ norm_layer=None):
574
+ super().__init__()
575
+ patch_size = to_2tuple(patch_size)
576
+ self.patch_size = patch_size
577
+
578
+ self.in_chans = in_chans
579
+ self.embed_dim = embed_dim
580
+
581
+ self.proj = nn.Conv2d(in_chans,
582
+ embed_dim,
583
+ kernel_size=patch_size,
584
+ stride=patch_size)
585
+ if norm_layer is not None:
586
+ self.norm = norm_layer(embed_dim)
587
+ else:
588
+ self.norm = None
589
+
590
+ def forward(self, x):
591
+ """Forward function."""
592
+ # padding
593
+ _, _, H, W = x.size()
594
+ if W % self.patch_size[1] != 0:
595
+ x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1]))
596
+ if H % self.patch_size[0] != 0:
597
+ x = F.pad(x,
598
+ (0, 0, 0, self.patch_size[0] - H % self.patch_size[0]))
599
+
600
+ x = self.proj(x) # B C Wh Ww
601
+ if self.norm is not None:
602
+ Wh, Ww = x.size(2), x.size(3)
603
+ x = x.flatten(2).transpose(1, 2)
604
+ x = self.norm(x)
605
+ x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww)
606
+
607
+ return x
608
+
609
+
610
+ class SwinTransformer(nn.Module):
611
+ """ Swin Transformer backbone.
612
+ A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` -
613
+ https://arxiv.org/pdf/2103.14030
614
+
615
+ Args:
616
+ pretrain_img_size (int): Input image size for training the pretrained model,
617
+ used in absolute postion embedding. Default 224.
618
+ patch_size (int | tuple(int)): Patch size. Default: 4.
619
+ in_chans (int): Number of input image channels. Default: 3.
620
+ embed_dim (int): Number of linear projection output channels. Default: 96.
621
+ depths (tuple[int]): Depths of each Swin Transformer stage.
622
+ num_heads (tuple[int]): Number of attention head of each stage.
623
+ window_size (int): Window size. Default: 7.
624
+ mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
625
+ qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
626
+ qk_scale (float): Override default qk scale of head_dim ** -0.5 if set.
627
+ drop_rate (float): Dropout rate.
628
+ attn_drop_rate (float): Attention dropout rate. Default: 0.
629
+ drop_path_rate (float): Stochastic depth rate. Default: 0.2.
630
+ norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
631
+ ape (bool): If True, add absolute position embedding to the patch embedding. Default: False.
632
+ patch_norm (bool): If True, add normalization after patch embedding. Default: True.
633
+ out_indices (Sequence[int]): Output from which stages.
634
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
635
+ -1 means not freezing any parameters.
636
+ use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
637
+ """
638
+
639
+ def __init__(self,
640
+ pretrain_img_size=224,
641
+ patch_size=4,
642
+ in_chans=3,
643
+ embed_dim=96,
644
+ depths=[2, 2, 6, 2],
645
+ num_heads=[3, 6, 12, 24],
646
+ window_size=7,
647
+ mlp_ratio=4.,
648
+ qkv_bias=True,
649
+ qk_scale=None,
650
+ drop_rate=0.,
651
+ attn_drop_rate=0.,
652
+ drop_path_rate=0.2,
653
+ norm_layer=nn.LayerNorm,
654
+ ape=False,
655
+ patch_norm=True,
656
+ out_indices=(0, 1, 2, 3),
657
+ frozen_stages=-1,
658
+ use_checkpoint=False):
659
+ super().__init__()
660
+
661
+ self.pretrain_img_size = pretrain_img_size
662
+ self.num_layers = len(depths)
663
+ self.embed_dim = embed_dim
664
+ self.ape = ape
665
+ self.patch_norm = patch_norm
666
+ self.out_indices = out_indices
667
+ self.frozen_stages = frozen_stages
668
+
669
+ # split image into non-overlapping patches
670
+ self.patch_embed = PatchEmbed(
671
+ patch_size=patch_size,
672
+ in_chans=in_chans,
673
+ embed_dim=embed_dim,
674
+ norm_layer=norm_layer if self.patch_norm else None)
675
+
676
+ # absolute position embedding
677
+ if self.ape:
678
+ pretrain_img_size = to_2tuple(pretrain_img_size)
679
+ patch_size = to_2tuple(patch_size)
680
+ patches_resolution = [
681
+ pretrain_img_size[0] // patch_size[0],
682
+ pretrain_img_size[1] // patch_size[1]
683
+ ]
684
+
685
+ self.absolute_pos_embed = nn.Parameter(
686
+ torch.zeros(1, embed_dim, patches_resolution[0],
687
+ patches_resolution[1]))
688
+ trunc_normal_(self.absolute_pos_embed, std=.02)
689
+
690
+ self.pos_drop = nn.Dropout(p=drop_rate)
691
+
692
+ # stochastic depth
693
+ dpr = [
694
+ x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
695
+ ] # stochastic depth decay rule
696
+
697
+ # build layers
698
+ self.layers = nn.ModuleList()
699
+ for i_layer in range(self.num_layers):
700
+ layer = BasicLayer(
701
+ dim=int(embed_dim * 2**i_layer),
702
+ depth=depths[i_layer],
703
+ num_heads=num_heads[i_layer],
704
+ window_size=window_size,
705
+ mlp_ratio=mlp_ratio,
706
+ qkv_bias=qkv_bias,
707
+ qk_scale=qk_scale,
708
+ drop=drop_rate,
709
+ attn_drop=attn_drop_rate,
710
+ drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
711
+ norm_layer=norm_layer,
712
+ downsample=PatchMerging if
713
+ (i_layer < self.num_layers - 1) else None,
714
+ use_checkpoint=use_checkpoint)
715
+ self.layers.append(layer)
716
+
717
+ num_features = [int(embed_dim * 2**i) for i in range(self.num_layers)]
718
+ self.num_features = num_features
719
+
720
+ # add a norm layer for each output
721
+ for i_layer in out_indices:
722
+ layer = norm_layer(num_features[i_layer])
723
+ layer_name = f'norm{i_layer}'
724
+ self.add_module(layer_name, layer)
725
+
726
+ self._freeze_stages()
727
+
728
+ def _freeze_stages(self):
729
+ if self.frozen_stages >= 0:
730
+ self.patch_embed.eval()
731
+ for param in self.patch_embed.parameters():
732
+ param.requires_grad = False
733
+
734
+ if self.frozen_stages >= 1 and self.ape:
735
+ self.absolute_pos_embed.requires_grad = False
736
+
737
+ if self.frozen_stages >= 2:
738
+ self.pos_drop.eval()
739
+ for i in range(0, self.frozen_stages - 1):
740
+ m = self.layers[i]
741
+ m.eval()
742
+ for param in m.parameters():
743
+ param.requires_grad = False
744
+
745
+ def init_weights(self, pretrained=None):
746
+ """Initialize the weights in backbone.
747
+
748
+ Args:
749
+ pretrained (str, optional): Path to pre-trained weights.
750
+ Defaults to None.
751
+ """
752
+
753
+ def _init_weights(m):
754
+ if isinstance(m, nn.Linear):
755
+ trunc_normal_(m.weight, std=.02)
756
+ if isinstance(m, nn.Linear) and m.bias is not None:
757
+ nn.init.constant_(m.bias, 0)
758
+ elif isinstance(m, nn.LayerNorm):
759
+ nn.init.constant_(m.bias, 0)
760
+ nn.init.constant_(m.weight, 1.0)
761
+
762
+ if isinstance(pretrained, str):
763
+ self.apply(_init_weights)
764
+ # logger = get_root_logger()
765
+ load_checkpoint(self, pretrained, strict=False, logger=None)
766
+ elif pretrained is None:
767
+ self.apply(_init_weights)
768
+ else:
769
+ raise TypeError('pretrained must be a str or None')
770
+
771
+ def forward(self, x):
772
+ x = self.patch_embed(x)
773
+
774
+ Wh, Ww = x.size(2), x.size(3)
775
+ if self.ape:
776
+ # interpolate the position embedding to the corresponding size
777
+ absolute_pos_embed = F.interpolate(self.absolute_pos_embed,
778
+ size=(Wh, Ww),
779
+ mode='bicubic')
780
+ x = (x + absolute_pos_embed) # B Wh*Ww C
781
+
782
+ outs = [x.contiguous()]
783
+ x = x.flatten(2).transpose(1, 2)
784
+ x = self.pos_drop(x)
785
+ for i in range(self.num_layers):
786
+ layer = self.layers[i]
787
+ x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww)
788
+
789
+ if i in self.out_indices:
790
+ norm_layer = getattr(self, f'norm{i}')
791
+ x_out = norm_layer(x_out)
792
+
793
+ out = x_out.view(-1, H, W,
794
+ self.num_features[i]).permute(0, 3, 1,
795
+ 2).contiguous()
796
+ outs.append(out)
797
+
798
+ return tuple(outs)
799
+
800
+ def train(self, mode=True):
801
+ """Convert the model into training mode while keep layers freezed."""
802
+ super(SwinTransformer, self).train(mode)
803
+ self._freeze_stages()
804
+ #+end_src
805
+
806
+ * Main code
807
+
808
+ ** train.import.py
809
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.import.py
810
+ import os
811
+
812
+ os.environ["CUDA_VISIBLE_DEVICES"] = '0'
813
+ HOME_DIR = os.environ.get('HOME', '/root')
814
+ #+end_src
815
+
816
+ ** train.import.py
817
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.import.py
818
+ import sys
819
+
820
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
821
+ #+end_src
822
+
823
+ ** train.import.py
824
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.import.py
825
+ from datetime import datetime
826
+ import argparse
827
+ import numpy as np
828
+ import random
829
+ import math
830
+ #+end_src
831
+
832
+ ** train.import.py
833
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.import.py
834
+ import cv2
835
+ from PIL import Image
836
+ from PIL import ImageEnhance
837
+ #+end_src
838
+
839
+ ** train.import.py
840
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.import.py
841
+ from einops import rearrange
842
+ #+end_src
843
+
844
+ ** train.import.py
845
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.import.py
846
+ import torch
847
+ import torch.nn as nn
848
+ import torch.nn.functional as F
849
+ import torch.utils.data as data
850
+
851
+ from torch.autograd import Variable
852
+ from torch.backends import cudnn
853
+ from torch.cuda import amp
854
+ from torch.utils.tensorboard import SummaryWriter
855
+
856
+ from torchvision import transforms
857
+ #+end_src
858
+
859
+ ** train.import.py
860
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.import.py
861
+ from prodigyopt import Prodigy
862
+ #+end_src
863
+
864
+ ** train.import.py
865
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.import.py
866
+ # from model.MVANet import MVANet
867
+ from swin import SwinB
868
+ #+end_src
869
+
870
+ ** train.function.py
871
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.function.py
872
+ def get_activation_fn(activation):
873
+ """Return an activation function given a string"""
874
+ if activation == "relu":
875
+ return F.relu
876
+ if activation == "gelu":
877
+ return F.gelu
878
+ if activation == "glu":
879
+ return F.glu
880
+ raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
881
+
882
+
883
+ def make_cbr(in_dim, out_dim):
884
+ return nn.Sequential(nn.Conv2d(in_dim, out_dim, kernel_size=3, padding=1),
885
+ nn.BatchNorm2d(out_dim), nn.PReLU())
886
+
887
+
888
+ def make_cbg(in_dim, out_dim):
889
+ return nn.Sequential(nn.Conv2d(in_dim, out_dim, kernel_size=3, padding=1),
890
+ nn.BatchNorm2d(out_dim), nn.GELU())
891
+
892
+
893
+ def rescale_to(x, scale_factor: float = 2, interpolation='nearest'):
894
+ return F.interpolate(x, scale_factor=scale_factor, mode=interpolation)
895
+
896
+
897
+ def resize_as(x, y, interpolation='bilinear'):
898
+ return F.interpolate(x, size=y.shape[-2:], mode=interpolation)
899
+
900
+
901
+ def image2patches(x):
902
+ """b c (hg h) (wg w) -> (hg wg b) c h w"""
903
+ x = rearrange(x, 'b c (hg h) (wg w) -> (hg wg b) c h w', hg=2, wg=2)
904
+ return x
905
+
906
+
907
+ def patches2image(x):
908
+ """(hg wg b) c h w -> b c (hg h) (wg w)"""
909
+ x = rearrange(x, '(hg wg b) c h w -> b c (hg h) (wg w)', hg=2, wg=2)
910
+ return x
911
+
912
+
913
+ def structure_loss(pred, mask):
914
+ weit = 1 + 5 * torch.abs(
915
+ F.avg_pool2d(mask, kernel_size=31, stride=1, padding=15) - mask)
916
+ wbce = F.binary_cross_entropy_with_logits(pred, mask, reduction='none')
917
+ wbce = (weit * wbce).sum(dim=(2, 3)) / weit.sum(dim=(2, 3))
918
+
919
+ pred = torch.sigmoid(pred)
920
+ inter = ((pred * mask) * weit).sum(dim=(2, 3))
921
+
922
+ union = ((pred + mask) * weit).sum(dim=(2, 3))
923
+ wiou = 1 - (inter + 1) / (union - inter + 1)
924
+
925
+ return (wbce + wiou).mean()
926
+
927
+
928
+ def clip_gradient(optimizer, grad_clip):
929
+ for group in optimizer.param_groups:
930
+ for param in group['params']:
931
+ if param.grad is not None:
932
+ param.grad.data.clamp_(-grad_clip, grad_clip)
933
+
934
+
935
+ def adjust_lr(optimizer, init_lr, epoch, decay_rate=0.1, decay_epoch=5):
936
+ decay = decay_rate**(epoch // decay_epoch)
937
+ for param_group in optimizer.param_groups:
938
+ param_group['lr'] *= decay
939
+
940
+
941
+ def truncated_normal_(tensor, mean=0, std=1):
942
+ size = tensor.shape
943
+ tmp = tensor.new_empty(size + (4, )).normal_()
944
+ valid = (tmp < 2) & (tmp > -2)
945
+ ind = valid.max(-1, keepdim=True)[1]
946
+ tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
947
+ tensor.data.mul_(std).add_(mean)
948
+
949
+
950
+ def init_weights(m):
951
+ if type(m) == nn.Conv2d or type(m) == nn.ConvTranspose2d:
952
+ nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
953
+ #nn.init.normal_(m.weight, std=0.001)
954
+ #nn.init.normal_(m.bias, std=0.001)
955
+ truncated_normal_(m.bias, mean=0, std=0.001)
956
+
957
+
958
+ def init_weights_orthogonal_normal(m):
959
+ if type(m) == nn.Conv2d or type(m) == nn.ConvTranspose2d:
960
+ nn.init.orthogonal_(m.weight)
961
+ truncated_normal_(m.bias, mean=0, std=0.001)
962
+ #nn.init.normal_(m.bias, std=0.001)
963
+
964
+
965
+ def l2_regularisation(m):
966
+ l2_reg = None
967
+
968
+ for W in m.parameters():
969
+ if l2_reg is None:
970
+ l2_reg = W.norm(2)
971
+ else:
972
+ l2_reg = l2_reg + W.norm(2)
973
+ return l2_reg
974
+
975
+
976
+ def check_mkdir(dir_name):
977
+ if not os.path.isdir(dir_name):
978
+ os.makedirs(dir_name)
979
+
980
+
981
+ # several data augumentation strategies
982
+ def cv_random_flip(img, label):
983
+ flip_flag = random.randint(0, 1)
984
+ flip_flag2 = random.randint(0, 1)
985
+
986
+ # left right flip
987
+ if flip_flag == 1:
988
+ img = img.transpose(Image.FLIP_LEFT_RIGHT)
989
+ label = label.transpose(Image.FLIP_LEFT_RIGHT)
990
+
991
+ # top bottom flip
992
+ if flip_flag2 == 1:
993
+ img = img.transpose(Image.FLIP_TOP_BOTTOM)
994
+ label = label.transpose(Image.FLIP_TOP_BOTTOM)
995
+
996
+ return img, label
997
+
998
+
999
+ def random_crop_full(image, X, Y, TX, TY):
1000
+ image_width = image.size[0]
1001
+ image_height = image.size[1]
1002
+ final_width = image_width * TX
1003
+ final_height = image_height * TY
1004
+
1005
+ start_x = (1.0 - TX) * X * image_width
1006
+ start_y = (1.0 - TY) * Y * image_height
1007
+
1008
+ random_region = (start_x, start_y, start_x + final_width,
1009
+ start_y + final_height)
1010
+
1011
+ return image.crop(random_region)
1012
+
1013
+
1014
+ def random_crop(image, X, Y, T):
1015
+ image_width = image.size[0]
1016
+ image_height = image.size[1]
1017
+ final_width = image_width * T
1018
+ final_height = image_height * T
1019
+
1020
+ start_x = (1.0 - T) * X * image_width
1021
+ start_y = (1.0 - T) * Y * image_height
1022
+
1023
+ random_region = (start_x, start_y, start_x + final_width,
1024
+ start_y + final_height)
1025
+
1026
+ return image.crop(random_region)
1027
+
1028
+
1029
+ def garment_color_jitter(image, mask):
1030
+ image = np.array(image)
1031
+ mask = np.array(mask)
1032
+ mask = (mask > 127).astype(dtype=np.uint8)
1033
+ image = cv2.cvtColor(src=image, code=cv2.COLOR_RGB2HSV_FULL)
1034
+ image[:, :, 0] += mask * np.random.randint(0, 255)
1035
+ image = cv2.cvtColor(src=image, code=cv2.COLOR_HSV2RGB_FULL)
1036
+ image = Image.fromarray(image)
1037
+ return image
1038
+
1039
+
1040
+ def garment_color_jitter_rotate(image, mask, rotate_index=0, shift_amount=0):
1041
+ image = np.array(image)
1042
+ mask = np.array(mask)
1043
+
1044
+ if rotate_index == 1:
1045
+
1046
+ image = cv2.rotate(src=image, rotateCode=cv2.ROTATE_90_CLOCKWISE)
1047
+ mask = cv2.rotate(src=mask, rotateCode=cv2.ROTATE_90_CLOCKWISE)
1048
+
1049
+ elif rotate_index == 2:
1050
+
1051
+ image = cv2.rotate(src=image, rotateCode=cv2.ROTATE_180)
1052
+ mask = cv2.rotate(src=mask, rotateCode=cv2.ROTATE_180)
1053
+
1054
+ elif rotate_index == 3:
1055
+
1056
+ image = cv2.rotate(src=image,
1057
+ rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE)
1058
+
1059
+ mask = cv2.rotate(src=mask, rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE)
1060
+
1061
+ image = cv2.cvtColor(src=image,
1062
+ code=cv2.COLOR_RGB2HSV_FULL).astype(dtype=np.int32)
1063
+ # image[:, :, 0] += mask_tmp * shift_amount
1064
+ image[:, :, 0] += shift_amount
1065
+ image[:, :, 0] %= 255
1066
+ image = cv2.cvtColor(src=image.astype(np.uint8),
1067
+ code=cv2.COLOR_HSV2RGB_FULL)
1068
+
1069
+ image = Image.fromarray(image)
1070
+ mask = Image.fromarray(mask)
1071
+
1072
+ return image, mask
1073
+
1074
+
1075
+ def randomCrop_Both(image, label):
1076
+
1077
+ image, label = garment_color_jitter_rotate(
1078
+ image=image,
1079
+ mask=label,
1080
+ rotate_index=np.random.randint(0, 4),
1081
+ shift_amount=np.random.randint(-4, +4),
1082
+ )
1083
+
1084
+ TX = (np.random.rand() * 0.6) + 0.4
1085
+ TY = (np.random.rand() * 0.6) + 0.4
1086
+ X = np.random.rand()
1087
+ Y = np.random.rand()
1088
+ return random_crop_full(image, X, Y, TX,
1089
+ TY), random_crop_full(label, X, Y, TX, TY)
1090
+
1091
+
1092
+ def randomCrop_Old(image, label):
1093
+
1094
+ # image, label = garment_color_jitter_rotate(
1095
+ # image=image,
1096
+ # mask=label,
1097
+ # rotate_index=np.random.randint(0, 4),
1098
+ # shift_amount=np.random.randint(0, 256))
1099
+
1100
+ # image, label = garment_color_jitter_rotate(
1101
+ # image=image,
1102
+ # mask=label,
1103
+ # rotate_index=np.random.randint(0, 4),
1104
+ # shift_amount=0,
1105
+ # )
1106
+
1107
+ T = (np.random.rand() * 0.6) + 0.4
1108
+ X = np.random.rand()
1109
+ Y = np.random.rand()
1110
+ return random_crop(image, X, Y, T), random_crop(label, X, Y, T)
1111
+
1112
+
1113
+ def randomCrop(image, label):
1114
+ return randomCrop_Both(image, label)
1115
+
1116
+
1117
+ def randomCrop_original(image, label):
1118
+ image_width = image.size[0]
1119
+ image_height = image.size[1]
1120
+ border = min(image_width, image_height) // 2
1121
+
1122
+ crop_win_width = np.random.randint(image_width - border, image_width)
1123
+ crop_win_height = np.random.randint(image_height - border, image_height)
1124
+
1125
+ random_region = ((image_width - crop_win_width) >> 1,
1126
+ (image_height - crop_win_height) >> 1,
1127
+ (image_width + crop_win_width) >> 1,
1128
+ (image_height + crop_win_height) >> 1)
1129
+
1130
+ return image.crop(random_region), label.crop(random_region)
1131
+
1132
+
1133
+ def randomRotation(image, label):
1134
+ mode = Image.BICUBIC
1135
+ if random.random() > 0.8:
1136
+ random_angle = np.random.randint(-15, 15)
1137
+ image = image.rotate(random_angle, mode)
1138
+ label = label.rotate(random_angle, mode)
1139
+ return image, label
1140
+
1141
+
1142
+ def colorEnhance(image):
1143
+ bright_intensity = random.randint(5, 15) / 10.0
1144
+ image = ImageEnhance.Brightness(image).enhance(bright_intensity)
1145
+ contrast_intensity = random.randint(5, 15) / 10.0
1146
+ image = ImageEnhance.Contrast(image).enhance(contrast_intensity)
1147
+ color_intensity = random.randint(0, 20) / 10.0
1148
+ image = ImageEnhance.Color(image).enhance(color_intensity)
1149
+ sharp_intensity = random.randint(0, 30) / 10.0
1150
+ image = ImageEnhance.Sharpness(image).enhance(sharp_intensity)
1151
+ return image
1152
+
1153
+
1154
+ def randomGaussian(image, mean=0.1, sigma=0.35):
1155
+
1156
+ def gaussianNoisy(im, mean=mean, sigma=sigma):
1157
+ for _i in range(len(im)):
1158
+ im[_i] += random.gauss(mean, sigma)
1159
+ return im
1160
+
1161
+ img = np.asarray(image)
1162
+ width, height = img.shape
1163
+ img = gaussianNoisy(img[:].flatten(), mean, sigma)
1164
+ img = img.reshape([width, height])
1165
+ return Image.fromarray(np.uint8(img))
1166
+
1167
+
1168
+ def randomPeper(img):
1169
+ img = np.array(img)
1170
+ noiseNum = int(0.0015 * img.shape[0] * img.shape[1])
1171
+ for i in range(noiseNum):
1172
+
1173
+ randX = random.randint(0, img.shape[0] - 1)
1174
+
1175
+ randY = random.randint(0, img.shape[1] - 1)
1176
+
1177
+ if random.randint(0, 1) == 0:
1178
+
1179
+ img[randX, randY] = 0
1180
+
1181
+ else:
1182
+
1183
+ img[randX, randY] = 255
1184
+ return Image.fromarray(img)
1185
+
1186
+
1187
+ # dataloader for training
1188
+ def get_loader(image_root,
1189
+ gt_root,
1190
+ batchsize,
1191
+ trainsize,
1192
+ shuffle=True,
1193
+ num_workers=12,
1194
+ pin_memory=False):
1195
+ print('DEBUG 6')
1196
+ dataset = DISDataset(image_root, gt_root, trainsize)
1197
+ print('DEBUG 7')
1198
+ data_loader = data.DataLoader(dataset=dataset,
1199
+ batch_size=batchsize,
1200
+ shuffle=shuffle,
1201
+ num_workers=num_workers,
1202
+ pin_memory=pin_memory)
1203
+ print('DEBUG 8')
1204
+ return data_loader
1205
+ #+end_src
1206
+
1207
+ ** train.class.py
1208
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.class.py
1209
+ class AvgMeter(object):
1210
+
1211
+ def __init__(self, num=40):
1212
+ self.num = num
1213
+ self.reset()
1214
+
1215
+ def reset(self):
1216
+ self.val = 0
1217
+ self.avg = 0
1218
+ self.sum = 0
1219
+ self.count = 0
1220
+ self.losses = []
1221
+
1222
+ def update(self, val, n=1):
1223
+ self.val = val
1224
+ self.sum += val * n
1225
+ self.count += n
1226
+ self.avg = self.sum / self.count
1227
+ self.losses.append(val)
1228
+
1229
+ def show(self):
1230
+ a = len(self.losses)
1231
+ b = np.maximum(a - self.num, 0)
1232
+ c = self.losses[b:]
1233
+ #print(c)
1234
+ #d = torch.mean(torch.stack(c))
1235
+ #print(d)
1236
+ return torch.mean(torch.stack(c))
1237
+
1238
+
1239
+ class Running_Avg(object):
1240
+
1241
+ def __init__(self, weight=0.999):
1242
+ self.weight = weight
1243
+ self.reset()
1244
+
1245
+ def reset(self):
1246
+ self.n = 0
1247
+ self.val = 0
1248
+
1249
+ def update(self, val, n=1):
1250
+ self.val = (self.weight * self.val) + ((1 - self.weight) * val)
1251
+ self.n = (self.weight * self.n) + ((1 - self.weight) * n)
1252
+
1253
+ def show(self):
1254
+ if self.n == 0:
1255
+ return 0
1256
+ else:
1257
+ return self.val / self.n
1258
+ #+end_src
1259
+
1260
+ ** Main training dataset
1261
+
1262
+ *** COMMENT Original
1263
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.class.py
1264
+ # dataset for training
1265
+ # The current loader is not using the normalized depth maps for training and test. If you use the normalized depth maps
1266
+ # (e.g., 0 represents background and 1 represents foreground.), the performance will be further improved.
1267
+ class DISDataset(data.Dataset):
1268
+
1269
+ def __init__(self, image_root, gt_root, trainsize):
1270
+ self.trainsize = trainsize
1271
+ self.images = [
1272
+ image_root + f for f in os.listdir(image_root)
1273
+ if f.endswith('.jpg') or f.endswith('.png') or f.endswith('tif')
1274
+ ]
1275
+ self.gts = [
1276
+ gt_root + f for f in os.listdir(gt_root)
1277
+ if f.endswith('.jpg') or f.endswith('.png') or f.endswith('tif')
1278
+ ]
1279
+ self.images = sorted(self.images)
1280
+ self.gts = sorted(self.gts)
1281
+ self.filter_files()
1282
+ self.size = len(self.images)
1283
+ self.img_transform = transforms.Compose([
1284
+ transforms.Resize((self.trainsize, self.trainsize)),
1285
+ transforms.ToTensor(),
1286
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
1287
+ ])
1288
+ self.gt_transform = transforms.Compose([
1289
+ transforms.Resize((self.trainsize, self.trainsize)),
1290
+ transforms.ToTensor()
1291
+ ])
1292
+
1293
+ def __getitem__(self, index):
1294
+ image = self.rgb_loader(self.images[index])
1295
+ gt = self.binary_loader(self.gts[index])
1296
+ image, gt = cv_random_flip(image, gt)
1297
+ image, gt = randomCrop(image, gt)
1298
+ image, gt = randomRotation(image, gt)
1299
+ image = colorEnhance(image)
1300
+ image = self.img_transform(image)
1301
+ gt = self.gt_transform(gt)
1302
+
1303
+ return image, gt
1304
+
1305
+ def filter_files(self):
1306
+ assert len(self.images) == len(self.gts) and len(self.gts) == len(
1307
+ self.images)
1308
+ images = []
1309
+ gts = []
1310
+ for img_path, gt_path in zip(self.images, self.gts):
1311
+ img = Image.open(img_path)
1312
+ gt = Image.open(gt_path)
1313
+ if img.size == gt.size:
1314
+ images.append(img_path)
1315
+ gts.append(gt_path)
1316
+ self.images = images
1317
+ self.gts = gts
1318
+
1319
+ def rgb_loader(self, path):
1320
+ with open(path, 'rb') as f:
1321
+ img = Image.open(f)
1322
+ return img.convert('RGB')
1323
+
1324
+ def binary_loader(self, path):
1325
+ with open(path, 'rb') as f:
1326
+ img = Image.open(f)
1327
+ return img.convert('L')
1328
+
1329
+ def resize(self, img, gt):
1330
+ assert img.size == gt.size
1331
+ w, h = img.size
1332
+ if h < self.trainsize or w < self.trainsize:
1333
+ h = max(h, self.trainsize)
1334
+ w = max(w, self.trainsize)
1335
+ return img.resize((w, h), Image.BILINEAR), gt.resize((w, h),
1336
+ Image.NEAREST)
1337
+ else:
1338
+ return img, gt
1339
+
1340
+ def __len__(self):
1341
+ return self.size
1342
+ #+end_src
1343
+
1344
+ *** Changed
1345
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.class.py
1346
+ # dataset for training
1347
+ # The current loader is not using the normalized depth maps for training and test. If you use the normalized depth maps
1348
+ # (e.g., 0 represents background and 1 represents foreground.), the performance will be further improved.
1349
+ class DISDataset(data.Dataset):
1350
+
1351
+ def __init__(self, image_root, gt_root, trainsize):
1352
+ self.trainsize = trainsize
1353
+ end_pattern = '_segm.png'
1354
+ files = list(f for f in os.listdir(gt_root) if f.endswith(end_pattern))
1355
+ files.sort()
1356
+
1357
+ self.gts = list(gt_root + f for f in files)
1358
+
1359
+ self.images = list(image_root + f[0:-len(end_pattern)] + '.jpg'
1360
+ for f in files)
1361
+
1362
+ self.size = len(self.images)
1363
+
1364
+ self.img_transform = transforms.Compose([
1365
+ transforms.Resize((self.trainsize, self.trainsize)),
1366
+ transforms.ToTensor(),
1367
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
1368
+ ])
1369
+
1370
+ self.gt_transform = transforms.Compose([
1371
+ transforms.Resize((self.trainsize, self.trainsize)),
1372
+ transforms.ToTensor()
1373
+ ])
1374
+
1375
+ def __getitem__(self, index):
1376
+ image = self.rgb_loader(self.images[index])
1377
+ gt = self.binary_loader(self.gts[index])
1378
+ image, gt = cv_random_flip(image, gt)
1379
+ image, gt = randomCrop(image, gt)
1380
+ image, gt = randomRotation(image, gt)
1381
+ image = colorEnhance(image)
1382
+ image = self.img_transform(image)
1383
+ gt = self.gt_transform(gt)
1384
+
1385
+ return image, gt
1386
+
1387
+ def filter_files(self):
1388
+ assert len(self.images) == len(self.gts) and len(self.gts) == len(
1389
+ self.images)
1390
+ images = []
1391
+ gts = []
1392
+ for img_path, gt_path in zip(self.images, self.gts):
1393
+ img = Image.open(img_path)
1394
+ gt = Image.open(gt_path)
1395
+ if img.size == gt.size:
1396
+ images.append(img_path)
1397
+ gts.append(gt_path)
1398
+ self.images = images
1399
+ self.gts = gts
1400
+
1401
+ def rgb_loader(self, path):
1402
+ with open(path, 'rb') as f:
1403
+ img = Image.open(f)
1404
+ return img.convert('RGB')
1405
+
1406
+ def binary_loader(self, path):
1407
+ with open(path, 'rb') as f:
1408
+ img = Image.open(f)
1409
+ return img.convert('L')
1410
+
1411
+ def resize(self, img, gt):
1412
+ assert img.size == gt.size
1413
+ w, h = img.size
1414
+ if h < self.trainsize or w < self.trainsize:
1415
+ h = max(h, self.trainsize)
1416
+ w = max(w, self.trainsize)
1417
+ return img.resize((w, h), Image.BILINEAR), gt.resize((w, h),
1418
+ Image.NEAREST)
1419
+ else:
1420
+ return img, gt
1421
+
1422
+ def __len__(self):
1423
+ return self.size
1424
+ #+end_src
1425
+
1426
+ ** train.class.py
1427
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.class.py
1428
+ # test dataset and loader
1429
+ class test_dataset:
1430
+
1431
+ def __init__(self, image_root, depth_root, testsize):
1432
+ self.testsize = testsize
1433
+ self.images = [
1434
+ image_root + f for f in os.listdir(image_root)
1435
+ if f.endswith('.jpg')
1436
+ ]
1437
+ self.depths = [
1438
+ depth_root + f for f in os.listdir(depth_root)
1439
+ if f.endswith('.bmp') or f.endswith('.png')
1440
+ ]
1441
+ self.images = sorted(self.images)
1442
+ self.depths = sorted(self.depths)
1443
+ self.transform = transforms.Compose([
1444
+ transforms.Resize((self.testsize, self.testsize)),
1445
+ transforms.ToTensor(),
1446
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
1447
+ ])
1448
+ # self.gt_transform = transforms.Compose([
1449
+ # transforms.Resize((self.trainsize, self.trainsize)),
1450
+ # transforms.ToTensor()])
1451
+ self.depths_transform = transforms.Compose([
1452
+ transforms.Resize((self.testsize, self.testsize)),
1453
+ transforms.ToTensor()
1454
+ ])
1455
+ self.size = len(self.images)
1456
+ self.index = 0
1457
+
1458
+ def load_data(self):
1459
+ image = self.rgb_loader(self.images[self.index])
1460
+ HH = image.size[0]
1461
+ WW = image.size[1]
1462
+ image = self.transform(image).unsqueeze(0)
1463
+ depth = self.rgb_loader(self.depths[self.index])
1464
+ depth = self.depths_transform(depth).unsqueeze(0)
1465
+
1466
+ name = self.images[self.index].split('/')[-1]
1467
+ # image_for_post=self.rgb_loader(self.images[self.index])
1468
+ # image_for_post=image_for_post.resize(gt.size)
1469
+ if name.endswith('.jpg'):
1470
+ name = name.split('.jpg')[0] + '.png'
1471
+ self.index += 1
1472
+ self.index = self.index % self.size
1473
+ return image, depth, HH, WW, name
1474
+
1475
+ def rgb_loader(self, path):
1476
+ with open(path, 'rb') as f:
1477
+ img = Image.open(f)
1478
+ return img.convert('RGB')
1479
+
1480
+ def binary_loader(self, path):
1481
+ with open(path, 'rb') as f:
1482
+ img = Image.open(f)
1483
+ return img.convert('L')
1484
+
1485
+ def __len__(self):
1486
+ return self.size
1487
+
1488
+
1489
+ class PositionEmbeddingSine:
1490
+
1491
+ def __init__(self,
1492
+ num_pos_feats=64,
1493
+ temperature=10000,
1494
+ normalize=False,
1495
+ scale=None):
1496
+
1497
+ super().__init__()
1498
+
1499
+ self.num_pos_feats = num_pos_feats
1500
+ self.temperature = temperature
1501
+ self.normalize = normalize
1502
+ if scale is not None and normalize is False:
1503
+ raise ValueError("normalize should be True if scale is passed")
1504
+ if scale is None:
1505
+ scale = 2 * math.pi
1506
+ self.scale = scale
1507
+ self.dim_t = torch.arange(0,
1508
+ self.num_pos_feats,
1509
+ dtype=torch.float32,
1510
+ device='cuda')
1511
+
1512
+ def __call__(self, b, h, w):
1513
+ mask = torch.zeros([b, h, w], dtype=torch.bool, device='cuda')
1514
+ assert mask is not None
1515
+ not_mask = ~mask
1516
+ y_embed = not_mask.cumsum(dim=1, dtype=torch.float32)
1517
+ x_embed = not_mask.cumsum(dim=2, dtype=torch.float32)
1518
+ if self.normalize:
1519
+ eps = 1e-6
1520
+ y_embed = ((y_embed - 0.5) / (y_embed[:, -1:, :] + eps) *
1521
+ self.scale).cuda()
1522
+ x_embed = ((x_embed - 0.5) / (x_embed[:, :, -1:] + eps) *
1523
+ self.scale).cuda()
1524
+
1525
+ dim_t = self.temperature**(2 * (self.dim_t // 2) / self.num_pos_feats)
1526
+
1527
+ pos_x = x_embed[:, :, :, None] / dim_t
1528
+ pos_y = y_embed[:, :, :, None] / dim_t
1529
+ pos_x = torch.stack(
1530
+ (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()),
1531
+ dim=4).flatten(3)
1532
+ pos_y = torch.stack(
1533
+ (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()),
1534
+ dim=4).flatten(3)
1535
+ return torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
1536
+
1537
+
1538
+ class MCLM(nn.Module):
1539
+
1540
+ def __init__(self, d_model, num_heads, pool_ratios=[1, 4, 8]):
1541
+ super(MCLM, self).__init__()
1542
+ self.attention = nn.ModuleList([
1543
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1544
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1545
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1546
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1547
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1)
1548
+ ])
1549
+
1550
+ self.linear1 = nn.Linear(d_model, d_model * 2)
1551
+ self.linear2 = nn.Linear(d_model * 2, d_model)
1552
+ self.linear3 = nn.Linear(d_model, d_model * 2)
1553
+ self.linear4 = nn.Linear(d_model * 2, d_model)
1554
+ self.norm1 = nn.LayerNorm(d_model)
1555
+ self.norm2 = nn.LayerNorm(d_model)
1556
+ self.dropout = nn.Dropout(0.1)
1557
+ self.dropout1 = nn.Dropout(0.1)
1558
+ self.dropout2 = nn.Dropout(0.1)
1559
+ self.activation = get_activation_fn('relu')
1560
+ self.pool_ratios = pool_ratios
1561
+ self.p_poses = []
1562
+ self.g_pos = None
1563
+ self.positional_encoding = PositionEmbeddingSine(
1564
+ num_pos_feats=d_model // 2, normalize=True)
1565
+
1566
+ def forward(self, l, g):
1567
+ """
1568
+ l: 4,c,h,w
1569
+ g: 1,c,h,w
1570
+ """
1571
+ b, c, h, w = l.size()
1572
+ # 4,c,h,w -> 1,c,2h,2w
1573
+ concated_locs = rearrange(l,
1574
+ '(hg wg b) c h w -> b c (hg h) (wg w)',
1575
+ hg=2,
1576
+ wg=2)
1577
+
1578
+ pools = []
1579
+ for pool_ratio in self.pool_ratios:
1580
+ # b,c,h,w
1581
+ tgt_hw = (round(h / pool_ratio), round(w / pool_ratio))
1582
+ pool = F.adaptive_avg_pool2d(concated_locs, tgt_hw)
1583
+ pools.append(rearrange(pool, 'b c h w -> (h w) b c'))
1584
+ if self.g_pos is None:
1585
+ pos_emb = self.positional_encoding(pool.shape[0],
1586
+ pool.shape[2],
1587
+ pool.shape[3])
1588
+ pos_emb = rearrange(pos_emb, 'b c h w -> (h w) b c')
1589
+ self.p_poses.append(pos_emb)
1590
+ pools = torch.cat(pools, 0)
1591
+ if self.g_pos is None:
1592
+ self.p_poses = torch.cat(self.p_poses, dim=0)
1593
+ pos_emb = self.positional_encoding(g.shape[0], g.shape[2],
1594
+ g.shape[3])
1595
+ self.g_pos = rearrange(pos_emb, 'b c h w -> (h w) b c')
1596
+
1597
+ # attention between glb (q) & multisensory concated-locs (k,v)
1598
+ g_hw_b_c = rearrange(g, 'b c h w -> (h w) b c')
1599
+ g_hw_b_c = g_hw_b_c + self.dropout1(self.attention[0](
1600
+ g_hw_b_c + self.g_pos, pools + self.p_poses, pools)[0])
1601
+ g_hw_b_c = self.norm1(g_hw_b_c)
1602
+ g_hw_b_c = g_hw_b_c + self.dropout2(
1603
+ self.linear2(
1604
+ self.dropout(self.activation(self.linear1(g_hw_b_c)).clone())))
1605
+ g_hw_b_c = self.norm2(g_hw_b_c)
1606
+
1607
+ # attention between origin locs (q) & freashed glb (k,v)
1608
+ l_hw_b_c = rearrange(l, "b c h w -> (h w) b c")
1609
+ _g_hw_b_c = rearrange(g_hw_b_c, '(h w) b c -> h w b c', h=h, w=w)
1610
+ _g_hw_b_c = rearrange(_g_hw_b_c,
1611
+ "(ng h) (nw w) b c -> (h w) (ng nw b) c",
1612
+ ng=2,
1613
+ nw=2)
1614
+ outputs_re = []
1615
+ for i, (_l, _g) in enumerate(
1616
+ zip(l_hw_b_c.chunk(4, dim=1), _g_hw_b_c.chunk(4, dim=1))):
1617
+ outputs_re.append(self.attention[i + 1](_l, _g,
1618
+ _g)[0]) # (h w) 1 c
1619
+ outputs_re = torch.cat(outputs_re, 1) # (h w) 4 c
1620
+
1621
+ l_hw_b_c = l_hw_b_c + self.dropout1(outputs_re)
1622
+ l_hw_b_c = self.norm1(l_hw_b_c)
1623
+ l_hw_b_c = l_hw_b_c + self.dropout2(
1624
+ self.linear4(
1625
+ self.dropout(self.activation(self.linear3(l_hw_b_c)).clone())))
1626
+ l_hw_b_c = self.norm2(l_hw_b_c)
1627
+
1628
+ l = torch.cat((l_hw_b_c, g_hw_b_c), 1) # hw,b(5),c
1629
+ return rearrange(l, "(h w) b c -> b c h w", h=h, w=w) ## (5,c,h*w)
1630
+
1631
+
1632
+ class inf_MCLM(nn.Module):
1633
+
1634
+ def __init__(self, d_model, num_heads, pool_ratios=[1, 4, 8]):
1635
+ super(inf_MCLM, self).__init__()
1636
+ self.attention = nn.ModuleList([
1637
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1638
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1639
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1640
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1641
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1)
1642
+ ])
1643
+
1644
+ self.linear1 = nn.Linear(d_model, d_model * 2)
1645
+ self.linear2 = nn.Linear(d_model * 2, d_model)
1646
+ self.linear3 = nn.Linear(d_model, d_model * 2)
1647
+ self.linear4 = nn.Linear(d_model * 2, d_model)
1648
+ self.norm1 = nn.LayerNorm(d_model)
1649
+ self.norm2 = nn.LayerNorm(d_model)
1650
+ self.dropout = nn.Dropout(0.1)
1651
+ self.dropout1 = nn.Dropout(0.1)
1652
+ self.dropout2 = nn.Dropout(0.1)
1653
+ self.activation = get_activation_fn('relu')
1654
+ self.pool_ratios = pool_ratios
1655
+ self.p_poses = []
1656
+ self.g_pos = None
1657
+ self.positional_encoding = PositionEmbeddingSine(
1658
+ num_pos_feats=d_model // 2, normalize=True)
1659
+
1660
+ def forward(self, l, g):
1661
+ """
1662
+ l: 4,c,h,w
1663
+ g: 1,c,h,w
1664
+ """
1665
+ b, c, h, w = l.size()
1666
+ # 4,c,h,w -> 1,c,2h,2w
1667
+ concated_locs = rearrange(l,
1668
+ '(hg wg b) c h w -> b c (hg h) (wg w)',
1669
+ hg=2,
1670
+ wg=2)
1671
+ self.p_poses = []
1672
+ pools = []
1673
+ for pool_ratio in self.pool_ratios:
1674
+ # b,c,h,w
1675
+ tgt_hw = (round(h / pool_ratio), round(w / pool_ratio))
1676
+ pool = F.adaptive_avg_pool2d(concated_locs, tgt_hw)
1677
+ pools.append(rearrange(pool, 'b c h w -> (h w) b c'))
1678
+ # if self.g_pos is None:
1679
+ pos_emb = self.positional_encoding(pool.shape[0], pool.shape[2],
1680
+ pool.shape[3])
1681
+ pos_emb = rearrange(pos_emb, 'b c h w -> (h w) b c')
1682
+ self.p_poses.append(pos_emb)
1683
+ pools = torch.cat(pools, 0)
1684
+ # if self.g_pos is None:
1685
+ self.p_poses = torch.cat(self.p_poses, dim=0)
1686
+ pos_emb = self.positional_encoding(g.shape[0], g.shape[2], g.shape[3])
1687
+ self.g_pos = rearrange(pos_emb, 'b c h w -> (h w) b c')
1688
+
1689
+ # attention between glb (q) & multisensory concated-locs (k,v)
1690
+ g_hw_b_c = rearrange(g, 'b c h w -> (h w) b c')
1691
+ g_hw_b_c = g_hw_b_c + self.dropout1(self.attention[0](
1692
+ g_hw_b_c + self.g_pos, pools + self.p_poses, pools)[0])
1693
+ g_hw_b_c = self.norm1(g_hw_b_c)
1694
+ g_hw_b_c = g_hw_b_c + self.dropout2(
1695
+ self.linear2(
1696
+ self.dropout(self.activation(self.linear1(g_hw_b_c)).clone())))
1697
+ g_hw_b_c = self.norm2(g_hw_b_c)
1698
+
1699
+ # attention between origin locs (q) & freashed glb (k,v)
1700
+ l_hw_b_c = rearrange(l, "b c h w -> (h w) b c")
1701
+ _g_hw_b_c = rearrange(g_hw_b_c, '(h w) b c -> h w b c', h=h, w=w)
1702
+ _g_hw_b_c = rearrange(_g_hw_b_c,
1703
+ "(ng h) (nw w) b c -> (h w) (ng nw b) c",
1704
+ ng=2,
1705
+ nw=2)
1706
+ outputs_re = []
1707
+ for i, (_l, _g) in enumerate(
1708
+ zip(l_hw_b_c.chunk(4, dim=1), _g_hw_b_c.chunk(4, dim=1))):
1709
+ outputs_re.append(self.attention[i + 1](_l, _g,
1710
+ _g)[0]) # (h w) 1 c
1711
+ outputs_re = torch.cat(outputs_re, 1) # (h w) 4 c
1712
+
1713
+ l_hw_b_c = l_hw_b_c + self.dropout1(outputs_re)
1714
+ l_hw_b_c = self.norm1(l_hw_b_c)
1715
+ l_hw_b_c = l_hw_b_c + self.dropout2(
1716
+ self.linear4(
1717
+ self.dropout(self.activation(self.linear3(l_hw_b_c)).clone())))
1718
+ l_hw_b_c = self.norm2(l_hw_b_c)
1719
+
1720
+ l = torch.cat((l_hw_b_c, g_hw_b_c), 1) # hw,b(5),c
1721
+ return rearrange(l, "(h w) b c -> b c h w", h=h, w=w) ## (5,c,h*w)
1722
+
1723
+
1724
+ class MCRM(nn.Module):
1725
+
1726
+ def __init__(self, d_model, num_heads, pool_ratios=[4, 8, 16], h=None):
1727
+ super(MCRM, self).__init__()
1728
+ self.attention = nn.ModuleList([
1729
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1730
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1731
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1732
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1)
1733
+ ])
1734
+
1735
+ self.linear3 = nn.Linear(d_model, d_model * 2)
1736
+ self.linear4 = nn.Linear(d_model * 2, d_model)
1737
+ self.norm1 = nn.LayerNorm(d_model)
1738
+ self.norm2 = nn.LayerNorm(d_model)
1739
+ self.dropout = nn.Dropout(0.1)
1740
+ self.dropout1 = nn.Dropout(0.1)
1741
+ self.dropout2 = nn.Dropout(0.1)
1742
+ self.sigmoid = nn.Sigmoid()
1743
+ self.activation = get_activation_fn('relu')
1744
+ self.sal_conv = nn.Conv2d(d_model, 1, 1)
1745
+ self.pool_ratios = pool_ratios
1746
+ self.positional_encoding = PositionEmbeddingSine(
1747
+ num_pos_feats=d_model // 2, normalize=True)
1748
+
1749
+ def forward(self, x):
1750
+ b, c, h, w = x.size()
1751
+ loc, glb = x.split([4, 1], dim=0) # 4,c,h,w; 1,c,h,w
1752
+ # b(4),c,h,w
1753
+ patched_glb = rearrange(glb,
1754
+ 'b c (hg h) (wg w) -> (hg wg b) c h w',
1755
+ hg=2,
1756
+ wg=2)
1757
+
1758
+ # generate token attention map
1759
+ token_attention_map = self.sigmoid(self.sal_conv(glb))
1760
+ token_attention_map = F.interpolate(token_attention_map,
1761
+ size=patches2image(loc).shape[-2:],
1762
+ mode='nearest')
1763
+ loc = loc * rearrange(token_attention_map,
1764
+ 'b c (hg h) (wg w) -> (hg wg b) c h w',
1765
+ hg=2,
1766
+ wg=2)
1767
+ pools = []
1768
+ for pool_ratio in self.pool_ratios:
1769
+ tgt_hw = (round(h / pool_ratio), round(w / pool_ratio))
1770
+ pool = F.adaptive_avg_pool2d(patched_glb, tgt_hw)
1771
+ pools.append(rearrange(pool,
1772
+ 'nl c h w -> nl c (h w)')) # nl(4),c,hw
1773
+ # nl(4),c,nphw -> nl(4),nphw,1,c
1774
+ pools = rearrange(torch.cat(pools, 2), "nl c nphw -> nl nphw 1 c")
1775
+ loc_ = rearrange(loc, 'nl c h w -> nl (h w) 1 c')
1776
+ outputs = []
1777
+ for i, q in enumerate(
1778
+ loc_.unbind(dim=0)): # traverse all local patches
1779
+ # np*hw,1,c
1780
+ v = pools[i]
1781
+ k = v
1782
+ outputs.append(self.attention[i](q, k, v)[0])
1783
+ outputs = torch.cat(outputs, 1)
1784
+ src = loc.view(4, c, -1).permute(2, 0, 1) + self.dropout1(outputs)
1785
+ src = self.norm1(src)
1786
+ src = src + self.dropout2(
1787
+ self.linear4(
1788
+ self.dropout(self.activation(self.linear3(src)).clone())))
1789
+ src = self.norm2(src)
1790
+
1791
+ src = src.permute(1, 2, 0).reshape(4, c, h, w) # freshed loc
1792
+ glb = glb + F.interpolate(patches2image(src),
1793
+ size=glb.shape[-2:],
1794
+ mode='nearest') # freshed glb
1795
+ return torch.cat((src, glb), 0), token_attention_map
1796
+
1797
+
1798
+ class inf_MCRM(nn.Module):
1799
+
1800
+ def __init__(self, d_model, num_heads, pool_ratios=[4, 8, 16], h=None):
1801
+ super(inf_MCRM, self).__init__()
1802
+ self.attention = nn.ModuleList([
1803
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1804
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1805
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1),
1806
+ nn.MultiheadAttention(d_model, num_heads, dropout=0.1)
1807
+ ])
1808
+
1809
+ self.linear3 = nn.Linear(d_model, d_model * 2)
1810
+ self.linear4 = nn.Linear(d_model * 2, d_model)
1811
+ self.norm1 = nn.LayerNorm(d_model)
1812
+ self.norm2 = nn.LayerNorm(d_model)
1813
+ self.dropout = nn.Dropout(0.1)
1814
+ self.dropout1 = nn.Dropout(0.1)
1815
+ self.dropout2 = nn.Dropout(0.1)
1816
+ self.sigmoid = nn.Sigmoid()
1817
+ self.activation = get_activation_fn('relu')
1818
+ self.sal_conv = nn.Conv2d(d_model, 1, 1)
1819
+ self.pool_ratios = pool_ratios
1820
+ self.positional_encoding = PositionEmbeddingSine(
1821
+ num_pos_feats=d_model // 2, normalize=True)
1822
+
1823
+ def forward(self, x):
1824
+ b, c, h, w = x.size()
1825
+ loc, glb = x.split([4, 1], dim=0) # 4,c,h,w; 1,c,h,w
1826
+ # b(4),c,h,w
1827
+ patched_glb = rearrange(glb,
1828
+ 'b c (hg h) (wg w) -> (hg wg b) c h w',
1829
+ hg=2,
1830
+ wg=2)
1831
+
1832
+ # generate token attention map
1833
+ token_attention_map = self.sigmoid(self.sal_conv(glb))
1834
+ token_attention_map = F.interpolate(token_attention_map,
1835
+ size=patches2image(loc).shape[-2:],
1836
+ mode='nearest')
1837
+ loc = loc * rearrange(token_attention_map,
1838
+ 'b c (hg h) (wg w) -> (hg wg b) c h w',
1839
+ hg=2,
1840
+ wg=2)
1841
+ pools = []
1842
+ for pool_ratio in self.pool_ratios:
1843
+ tgt_hw = (round(h / pool_ratio), round(w / pool_ratio))
1844
+ pool = F.adaptive_avg_pool2d(patched_glb, tgt_hw)
1845
+ pools.append(rearrange(pool,
1846
+ 'nl c h w -> nl c (h w)')) # nl(4),c,hw
1847
+ # nl(4),c,nphw -> nl(4),nphw,1,c
1848
+ pools = rearrange(torch.cat(pools, 2), "nl c nphw -> nl nphw 1 c")
1849
+ loc_ = rearrange(loc, 'nl c h w -> nl (h w) 1 c')
1850
+ outputs = []
1851
+ for i, q in enumerate(
1852
+ loc_.unbind(dim=0)): # traverse all local patches
1853
+ # np*hw,1,c
1854
+ v = pools[i]
1855
+ k = v
1856
+ outputs.append(self.attention[i](q, k, v)[0])
1857
+ outputs = torch.cat(outputs, 1)
1858
+ src = loc.view(4, c, -1).permute(2, 0, 1) + self.dropout1(outputs)
1859
+ src = self.norm1(src)
1860
+ src = src + self.dropout2(
1861
+ self.linear4(
1862
+ self.dropout(self.activation(self.linear3(src)).clone())))
1863
+ src = self.norm2(src)
1864
+
1865
+ src = src.permute(1, 2, 0).reshape(4, c, h, w) # freshed loc
1866
+ glb = glb + F.interpolate(patches2image(src),
1867
+ size=glb.shape[-2:],
1868
+ mode='nearest') # freshed glb
1869
+ return torch.cat((src, glb), 0)
1870
+
1871
+
1872
+ # model for single-scale training
1873
+ class MVANet(nn.Module):
1874
+
1875
+ def __init__(self):
1876
+ super().__init__()
1877
+ self.backbone = SwinB(pretrained=True)
1878
+ emb_dim = 128
1879
+ self.sideout5 = nn.Sequential(
1880
+ nn.Conv2d(emb_dim, 1, kernel_size=3, padding=1))
1881
+ self.sideout4 = nn.Sequential(
1882
+ nn.Conv2d(emb_dim, 1, kernel_size=3, padding=1))
1883
+ self.sideout3 = nn.Sequential(
1884
+ nn.Conv2d(emb_dim, 1, kernel_size=3, padding=1))
1885
+ self.sideout2 = nn.Sequential(
1886
+ nn.Conv2d(emb_dim, 1, kernel_size=3, padding=1))
1887
+ self.sideout1 = nn.Sequential(
1888
+ nn.Conv2d(emb_dim, 1, kernel_size=3, padding=1))
1889
+
1890
+ self.output5 = make_cbr(1024, emb_dim)
1891
+ self.output4 = make_cbr(512, emb_dim)
1892
+ self.output3 = make_cbr(256, emb_dim)
1893
+ self.output2 = make_cbr(128, emb_dim)
1894
+ self.output1 = make_cbr(128, emb_dim)
1895
+
1896
+ self.multifieldcrossatt = MCLM(emb_dim, 1, [1, 4, 8])
1897
+ self.conv1 = make_cbr(emb_dim, emb_dim)
1898
+ self.conv2 = make_cbr(emb_dim, emb_dim)
1899
+ self.conv3 = make_cbr(emb_dim, emb_dim)
1900
+ self.conv4 = make_cbr(emb_dim, emb_dim)
1901
+ self.dec_blk1 = MCRM(emb_dim, 1, [2, 4, 8])
1902
+ self.dec_blk2 = MCRM(emb_dim, 1, [2, 4, 8])
1903
+ self.dec_blk3 = MCRM(emb_dim, 1, [2, 4, 8])
1904
+ self.dec_blk4 = MCRM(emb_dim, 1, [2, 4, 8])
1905
+
1906
+ self.insmask_head = nn.Sequential(
1907
+ nn.Conv2d(emb_dim, 384, kernel_size=3, padding=1),
1908
+ nn.BatchNorm2d(384), nn.PReLU(),
1909
+ nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.BatchNorm2d(384),
1910
+ nn.PReLU(), nn.Conv2d(384, emb_dim, kernel_size=3, padding=1))
1911
+
1912
+ self.shallow = nn.Sequential(
1913
+ nn.Conv2d(3, emb_dim, kernel_size=3, padding=1))
1914
+ self.upsample1 = make_cbg(emb_dim, emb_dim)
1915
+ self.upsample2 = make_cbg(emb_dim, emb_dim)
1916
+ self.output = nn.Sequential(
1917
+ nn.Conv2d(emb_dim, 1, kernel_size=3, padding=1))
1918
+
1919
+ for m in self.modules():
1920
+ if isinstance(m, nn.ReLU) or isinstance(m, nn.Dropout):
1921
+ m.inplace = True
1922
+
1923
+ def forward(self, x):
1924
+ shallow = self.shallow(x)
1925
+ glb = rescale_to(x, scale_factor=0.5, interpolation='bilinear')
1926
+ loc = image2patches(x)
1927
+ input = torch.cat((loc, glb), dim=0)
1928
+ feature = self.backbone(input)
1929
+ e5 = self.output5(feature[4]) # (5,128,16,16)
1930
+ e4 = self.output4(feature[3]) # (5,128,32,32)
1931
+ e3 = self.output3(feature[2]) # (5,128,64,64)
1932
+ e2 = self.output2(feature[1]) # (5,128,128,128)
1933
+ e1 = self.output1(feature[0]) # (5,128,128,128)
1934
+ loc_e5, glb_e5 = e5.split([4, 1], dim=0)
1935
+ e5 = self.multifieldcrossatt(loc_e5, glb_e5) # (4,128,16,16)
1936
+
1937
+ e4, tokenattmap4 = self.dec_blk4(e4 + resize_as(e5, e4))
1938
+ e4 = self.conv4(e4)
1939
+ e3, tokenattmap3 = self.dec_blk3(e3 + resize_as(e4, e3))
1940
+ e3 = self.conv3(e3)
1941
+ e2, tokenattmap2 = self.dec_blk2(e2 + resize_as(e3, e2))
1942
+ e2 = self.conv2(e2)
1943
+ e1, tokenattmap1 = self.dec_blk1(e1 + resize_as(e2, e1))
1944
+ e1 = self.conv1(e1)
1945
+ loc_e1, glb_e1 = e1.split([4, 1], dim=0)
1946
+ output1_cat = patches2image(loc_e1) # (1,128,256,256)
1947
+ # add glb feat in
1948
+ output1_cat = output1_cat + resize_as(glb_e1, output1_cat)
1949
+ # merge
1950
+ final_output = self.insmask_head(output1_cat) # (1,128,256,256)
1951
+ # shallow feature merge
1952
+ final_output = final_output + resize_as(shallow, final_output)
1953
+ final_output = self.upsample1(rescale_to(final_output))
1954
+ final_output = rescale_to(final_output +
1955
+ resize_as(shallow, final_output))
1956
+ final_output = self.upsample2(final_output)
1957
+ final_output = self.output(final_output)
1958
+ ####
1959
+ sideout5 = self.sideout5(e5).cuda()
1960
+ sideout4 = self.sideout4(e4)
1961
+ sideout3 = self.sideout3(e3)
1962
+ sideout2 = self.sideout2(e2)
1963
+ sideout1 = self.sideout1(e1)
1964
+ #######glb_sideouts ######
1965
+ glb5 = self.sideout5(glb_e5)
1966
+ glb4 = sideout4[-1, :, :, :].unsqueeze(0)
1967
+ glb3 = sideout3[-1, :, :, :].unsqueeze(0)
1968
+ glb2 = sideout2[-1, :, :, :].unsqueeze(0)
1969
+ glb1 = sideout1[-1, :, :, :].unsqueeze(0)
1970
+ ####### concat 4 to 1 #######
1971
+ sideout1 = patches2image(sideout1[:-1]).cuda()
1972
+ sideout2 = patches2image(
1973
+ sideout2[:-1]).cuda() ####(5,c,h,w) -> (1 c 2h,2w)
1974
+ sideout3 = patches2image(sideout3[:-1]).cuda()
1975
+ sideout4 = patches2image(sideout4[:-1]).cuda()
1976
+ sideout5 = patches2image(sideout5[:-1]).cuda()
1977
+ if self.training:
1978
+ return sideout5, sideout4, sideout3, sideout2, sideout1, final_output, glb5, glb4, glb3, glb2, glb1, tokenattmap4, tokenattmap3, tokenattmap2, tokenattmap1
1979
+ else:
1980
+ return final_output
1981
+
1982
+
1983
+ # model for multi-scale testing
1984
+ class inf_MVANet(nn.Module):
1985
+
1986
+ def __init__(self):
1987
+ super().__init__()
1988
+ self.backbone = SwinB(pretrained=True)
1989
+
1990
+ emb_dim = 128
1991
+ self.output5 = make_cbr(1024, emb_dim)
1992
+ self.output4 = make_cbr(512, emb_dim)
1993
+ self.output3 = make_cbr(256, emb_dim)
1994
+ self.output2 = make_cbr(128, emb_dim)
1995
+ self.output1 = make_cbr(128, emb_dim)
1996
+
1997
+ self.multifieldcrossatt = inf_MCLM(emb_dim, 1, [1, 4, 8])
1998
+ self.conv1 = make_cbr(emb_dim, emb_dim)
1999
+ self.conv2 = make_cbr(emb_dim, emb_dim)
2000
+ self.conv3 = make_cbr(emb_dim, emb_dim)
2001
+ self.conv4 = make_cbr(emb_dim, emb_dim)
2002
+ self.dec_blk1 = inf_MCRM(emb_dim, 1, [2, 4, 8])
2003
+ self.dec_blk2 = inf_MCRM(emb_dim, 1, [2, 4, 8])
2004
+ self.dec_blk3 = inf_MCRM(emb_dim, 1, [2, 4, 8])
2005
+ self.dec_blk4 = inf_MCRM(emb_dim, 1, [2, 4, 8])
2006
+
2007
+ self.insmask_head = nn.Sequential(
2008
+ nn.Conv2d(emb_dim, 384, kernel_size=3, padding=1),
2009
+ nn.BatchNorm2d(384), nn.PReLU(),
2010
+ nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.BatchNorm2d(384),
2011
+ nn.PReLU(), nn.Conv2d(384, emb_dim, kernel_size=3, padding=1))
2012
+
2013
+ self.shallow = nn.Sequential(
2014
+ nn.Conv2d(3, emb_dim, kernel_size=3, padding=1))
2015
+ self.upsample1 = make_cbg(emb_dim, emb_dim)
2016
+ self.upsample2 = make_cbg(emb_dim, emb_dim)
2017
+ self.output = nn.Sequential(
2018
+ nn.Conv2d(emb_dim, 1, kernel_size=3, padding=1))
2019
+
2020
+ for m in self.modules():
2021
+ if isinstance(m, nn.ReLU) or isinstance(m, nn.Dropout):
2022
+ m.inplace = True
2023
+
2024
+ def forward(self, x):
2025
+ shallow = self.shallow(x)
2026
+ glb = rescale_to(x, scale_factor=0.5, interpolation='bilinear')
2027
+ loc = image2patches(x)
2028
+ input = torch.cat((loc, glb), dim=0)
2029
+ feature = self.backbone(input)
2030
+ e5 = self.output5(feature[4])
2031
+ e4 = self.output4(feature[3])
2032
+ e3 = self.output3(feature[2])
2033
+ e2 = self.output2(feature[1])
2034
+ e1 = self.output1(feature[0])
2035
+ print(e5.shape)
2036
+ loc_e5, glb_e5 = e5.split([4, 1], dim=0)
2037
+ e5_cat = self.multifieldcrossatt(loc_e5, glb_e5)
2038
+
2039
+ e4 = self.conv4(self.dec_blk4(e4 + resize_as(e5_cat, e4)))
2040
+ e3 = self.conv3(self.dec_blk3(e3 + resize_as(e4, e3)))
2041
+ e2 = self.conv2(self.dec_blk2(e2 + resize_as(e3, e2)))
2042
+ e1 = self.conv1(self.dec_blk1(e1 + resize_as(e2, e1)))
2043
+ loc_e1, glb_e1 = e1.split([4, 1], dim=0)
2044
+ # after decoder, concat loc features to a whole one, and merge
2045
+ output1_cat = patches2image(loc_e1)
2046
+ # add glb feat in
2047
+ output1_cat = output1_cat + resize_as(glb_e1, output1_cat)
2048
+ # merge
2049
+ final_output = self.insmask_head(output1_cat)
2050
+ # shallow feature merge
2051
+ final_output = final_output + resize_as(shallow, final_output)
2052
+ final_output = self.upsample1(rescale_to(final_output))
2053
+ final_output = rescale_to(final_output +
2054
+ resize_as(shallow, final_output))
2055
+ final_output = self.upsample2(final_output)
2056
+ final_output = self.output(final_output)
2057
+ return final_output
2058
+ #+end_src
2059
+
2060
+ ** train.execute.py
2061
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.execute.py
2062
+ writer = SummaryWriter()
2063
+
2064
+ cudnn.benchmark = True
2065
+
2066
+ parser = argparse.ArgumentParser()
2067
+ parser.add_argument('--epoch', type=int, default=80, help='epoch number')
2068
+ parser.add_argument('--lr_gen', type=float, default=1e-5, help='learning rate')
2069
+ parser.add_argument('--batchsize',
2070
+ type=int,
2071
+ default=1,
2072
+ help='training batch size')
2073
+ parser.add_argument('--trainsize',
2074
+ type=int,
2075
+ default=1024,
2076
+ help='training dataset size')
2077
+ parser.add_argument('--decay_rate',
2078
+ type=float,
2079
+ default=0.9,
2080
+ help='decay rate of learning rate')
2081
+ parser.add_argument('--decay_epoch',
2082
+ type=int,
2083
+ default=80,
2084
+ help='every n epochs decay learning rate')
2085
+
2086
+ opt = parser.parse_args()
2087
+ print('Generator Learning Rate: {}'.format(opt.lr_gen))
2088
+ # build models
2089
+ if hasattr(torch.cuda, 'empty_cache'):
2090
+ torch.cuda.empty_cache()
2091
+ generator = MVANet()
2092
+ generator.cuda()
2093
+ print('DEBUG 3')
2094
+
2095
+ pretrained_dict = torch.load(
2096
+ HOME_DIR +
2097
+ '/GITHUB/aravind-h-v/dreambooth_experiments/cloth_segmentation/MVANet_Train/pretrained_model/Model_80.pth',
2098
+ map_location='cuda')
2099
+
2100
+ model_dict = generator.state_dict()
2101
+ pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
2102
+ model_dict.update(pretrained_dict)
2103
+ generator.load_state_dict(model_dict)
2104
+
2105
+ generator_params = generator.parameters()
2106
+ # generator_optimizer = torch.optim.Adam(generator_params, opt.lr_gen)
2107
+ generator_optimizer = Prodigy(generator_params, lr=1., weight_decay=0.01)
2108
+
2109
+ print('DEBUG 4')
2110
+
2111
+ image_root = './data/image/'
2112
+ gt_root = './data/mask/'
2113
+
2114
+ train_loader = get_loader(image_root,
2115
+ gt_root,
2116
+ batchsize=opt.batchsize,
2117
+ trainsize=opt.trainsize)
2118
+
2119
+ print('DEBUG 5')
2120
+
2121
+ total_step = len(train_loader)
2122
+ to_pil = transforms.ToPILImage()
2123
+ ## define loss
2124
+ print('DEBUG 2')
2125
+
2126
+ CE = torch.nn.BCELoss()
2127
+ mse_loss = torch.nn.MSELoss(size_average=True, reduce=True)
2128
+ size_rates = [1]
2129
+ criterion = nn.BCEWithLogitsLoss().cuda()
2130
+ criterion_mae = nn.L1Loss().cuda()
2131
+ criterion_mse = nn.MSELoss().cuda()
2132
+ use_fp16 = True
2133
+ scaler = amp.GradScaler(enabled=use_fp16)
2134
+ print('DEBUG 1')
2135
+
2136
+ for epoch in range(1, opt.epoch + 1):
2137
+ torch.cuda.empty_cache()
2138
+ generator.train()
2139
+ # loss_record = AvgMeter()
2140
+ loss_record = Running_Avg()
2141
+ print('Generator Learning Rate: {}'.format(
2142
+ generator_optimizer.param_groups[0]['lr']))
2143
+ for i, pack in enumerate(train_loader, start=1):
2144
+ torch.cuda.empty_cache()
2145
+ for rate in size_rates:
2146
+ torch.cuda.empty_cache()
2147
+ generator_optimizer.zero_grad()
2148
+ images, gts = pack
2149
+ images = Variable(images)
2150
+ gts = Variable(gts)
2151
+ images = images.cuda()
2152
+ gts = gts.cuda()
2153
+ trainsize = int(round(opt.trainsize * rate / 32) * 32)
2154
+ if rate != 1:
2155
+ images = F.upsample(images,
2156
+ size=(trainsize, trainsize),
2157
+ mode='bilinear',
2158
+ align_corners=True)
2159
+ gts = F.upsample(gts,
2160
+ size=(trainsize, trainsize),
2161
+ mode='bilinear',
2162
+ align_corners=True)
2163
+
2164
+ b, c, h, w = gts.size()
2165
+ target_1 = F.upsample(gts, size=h // 4, mode='nearest')
2166
+ target_2 = F.upsample(gts, size=h // 8, mode='nearest').cuda()
2167
+ target_3 = F.upsample(gts, size=h // 16, mode='nearest').cuda()
2168
+ target_4 = F.upsample(gts, size=h // 32, mode='nearest').cuda()
2169
+ target_5 = F.upsample(gts, size=h // 64, mode='nearest').cuda()
2170
+
2171
+ with amp.autocast(enabled=use_fp16):
2172
+ sideout5, sideout4, sideout3, sideout2, sideout1, final, glb5, glb4, glb3, glb2, glb1, tokenattmap4, tokenattmap3, tokenattmap2, tokenattmap1 = generator.forward(
2173
+ images)
2174
+ loss1 = structure_loss(sideout5, target_4)
2175
+ loss2 = structure_loss(sideout4, target_3)
2176
+ loss3 = structure_loss(sideout3, target_2)
2177
+ loss4 = structure_loss(sideout2, target_1)
2178
+ loss5 = structure_loss(sideout1, target_1)
2179
+ loss6 = structure_loss(final, gts)
2180
+ loss7 = structure_loss(glb5, target_5)
2181
+ loss8 = structure_loss(glb4, target_4)
2182
+ loss9 = structure_loss(glb3, target_3)
2183
+ loss10 = structure_loss(glb2, target_2)
2184
+ loss11 = structure_loss(glb1, target_2)
2185
+ loss12 = structure_loss(tokenattmap4, target_3)
2186
+ loss13 = structure_loss(tokenattmap3, target_2)
2187
+ loss14 = structure_loss(tokenattmap2, target_1)
2188
+ loss15 = structure_loss(tokenattmap1, target_1)
2189
+ loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6 + 0.3 * (
2190
+ loss7 + loss8 + loss9 + loss10 +
2191
+ loss11) + 0.3 * (loss12 + loss13 + loss14 + loss15)
2192
+ Loss_loc = loss1 + loss2 + loss3 + loss4 + loss5 + loss6
2193
+ Loss_glb = loss7 + loss8 + loss9 + loss10 + loss11
2194
+ Loss_map = loss12 + loss13 + loss14 + loss15
2195
+ writer.add_scalar('loss', loss.item(),
2196
+ epoch * len(train_loader) + i)
2197
+
2198
+ generator_optimizer.zero_grad()
2199
+ scaler.scale(loss).backward()
2200
+ scaler.step(generator_optimizer)
2201
+ scaler.update()
2202
+
2203
+ if rate == 1:
2204
+ loss_record.update(loss.data, opt.batchsize)
2205
+
2206
+ if i % 10 == 0 or i == total_step:
2207
+ print(
2208
+ '{} Epoch [{:03d}/{:03d}], Step [{:04d}/{:04d}], gen Loss: {:.4f}'
2209
+ .format(datetime.now(), epoch, opt.epoch, i, total_step,
2210
+ loss_record.show()))
2211
+
2212
+ if i % 8000 == 0 or i == total_step:
2213
+ save_path = './saved_model/'
2214
+ if not os.path.exists(save_path):
2215
+ os.mkdir(save_path)
2216
+ torch.save(
2217
+ generator.state_dict(),
2218
+ save_path + 'Model' + '_%d' % epoch + '_%d' % i + '.pth')
2219
+
2220
+ # adjust_lr(generator_optimizer, opt.lr_gen, epoch, opt.decay_rate,
2221
+ # opt.decay_epoch)
2222
+ # save checkpoints every 20 epochs
2223
+ # if epoch % 20 == 0:
2224
+ if True:
2225
+
2226
+ save_path = './saved_model/'
2227
+ if not os.path.exists(save_path):
2228
+ os.mkdir(save_path)
2229
+
2230
+ save_path = './saved_model/MVANet/'
2231
+ if not os.path.exists(save_path):
2232
+ os.mkdir(save_path)
2233
+
2234
+ torch.save(generator.state_dict(),
2235
+ save_path + 'Model' + '_%d' % epoch + '.pth')
2236
+ #+end_src
2237
+
2238
+ * SAMPLE
2239
+
2240
+ ** train
2241
+
2242
+ *** train.import.py
2243
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.import.py
2244
+ #+end_src
2245
+
2246
+ *** train.function.py
2247
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.function.py
2248
+ #+end_src
2249
+
2250
+ *** train.class.py
2251
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.class.py
2252
+ #+end_src
2253
+
2254
+ *** train.execute.py
2255
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./train.execute.py
2256
+ #+end_src
2257
+
2258
+ ** swin
2259
+
2260
+ *** swin.import.py
2261
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./swin.import.py
2262
+ #+end_src
2263
+
2264
+ *** swin.function.py
2265
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./swin.function.py
2266
+ #+end_src
2267
+
2268
+ *** swin.class.py
2269
+ #+begin_src python :shebang #!/usr/bin/python3 :results output :tangle ./swin.class.py
2270
+ #+end_src
2271
+
2272
+ * UNIFY
2273
+ #+begin_src sh :shebang #!/bin/sh :results output :tangle ./train.unify.sh
2274
+ . "${HOME}/dbnew.sh"
2275
+
2276
+ echo '#!/usr/bin/python3' > './train.py'
2277
+
2278
+ cat \
2279
+ './train.import.py' \
2280
+ './train.function.py' \
2281
+ './train.class.py' \
2282
+ './train.execute.py' \
2283
+ | expand | yapf3 \
2284
+ | grep -v '^#!/usr/bin/python3$' \
2285
+ >> './train.py' \
2286
+ ;
2287
+
2288
+ echo '#!/usr/bin/python3' > './swin.py'
2289
+
2290
+ cat \
2291
+ './swin.import.py' \
2292
+ './swin.function.py' \
2293
+ './swin.class.py' \
2294
+ | expand | yapf3 \
2295
+ | grep -v '^#!/usr/bin/python3$' \
2296
+ >> './swin.py' \
2297
+ ;
2298
+
2299
+ rm -vf -- \
2300
+ './swin.class.py' \
2301
+ './swin.function.py' \
2302
+ './swin.import.py' \
2303
+ './train.class.py' \
2304
+ './train.execute.py' \
2305
+ './train.function.py' \
2306
+ './train.import.py' \
2307
+ './train.unify.sh' \
2308
+ ;
2309
+ #+end_src
2310
+
2311
+ * Run
2312
+ #+begin_src sh :shebang #!/bin/sh :results output :tangle ./run.sh
2313
+ . "${HOME}/dbnew.sh"
2314
+
2315
+ cd "$('dirname' '--' "${0}")"
2316
+
2317
+ pip3 install -r './requirements.txt'
2318
+
2319
+ python3 ./train.py --batchsize 4
2320
+ #+end_src
2321
+
2322
+ * WORK SPACE
2323
+
2324
+ ** ELISP
2325
+ #+begin_src elisp
2326
+ (save-buffer)
2327
+ (org-babel-tangle)
2328
+ (shell-command "./train.unify.sh")
2329
+ #+end_src
2330
+
2331
+ #+RESULTS:
2332
+ : 0
2333
+
2334
+ ** SHELL
2335
+ #+begin_src sh :shebang #!/bin/sh :results output
2336
+ realpath .
2337
+ cd /home/asd/GITHUB/aravind-h-v/dreambooth_experiments/cloth_segmentation/MVANet_Train
2338
+ #+end_src