Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

Realcat commited on about 1 month ago

Commit

7dc6568

•

1 Parent(s): 614259e

add: xoftr

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +1 -0
hloc/extractors/darkfeat.py +0 -1
hloc/extractors/rord.py +0 -1
hloc/extractors/sfd2.py +1 -3
hloc/match_dense.py +19 -0
hloc/match_features.py +7 -2
hloc/matchers/aspanformer.py +0 -1
hloc/matchers/dkm.py +0 -1
hloc/matchers/gim.py +5 -3
hloc/matchers/imp.py +1 -3
hloc/matchers/mickey.py +0 -2
hloc/matchers/omniglue.py +0 -1
hloc/matchers/xoftr.py +93 -0
third_party/XoFTR/LICENSE +202 -0
third_party/XoFTR/README.md +115 -0
third_party/XoFTR/configs/data/__init__.py +0 -0
third_party/XoFTR/configs/data/base.py +35 -0
third_party/XoFTR/configs/data/megadepth_trainval_840.py +22 -0
third_party/XoFTR/configs/data/megadepth_vistir_trainval_640.py +23 -0
third_party/XoFTR/configs/data/pretrain.py +8 -0
third_party/XoFTR/configs/xoftr/outdoor/visible_thermal.py +17 -0
third_party/XoFTR/configs/xoftr/pretrain/pretrain.py +12 -0
third_party/XoFTR/data/megadepth/index/.gitignore +4 -0
third_party/XoFTR/data/megadepth/test/.gitignore +4 -0
third_party/XoFTR/data/megadepth/train/.gitignore +4 -0
third_party/XoFTR/docs/TRAINING.md +63 -0
third_party/XoFTR/environment.yaml +14 -0
third_party/XoFTR/notebooks/xoftr_demo.ipynb +0 -0
third_party/XoFTR/notebooks/xoftr_demo_batch.ipynb +0 -0
third_party/XoFTR/pretrain.py +125 -0
third_party/XoFTR/requirements.txt +19 -0
third_party/XoFTR/scripts/reproduce_train/pretrain.sh +31 -0
third_party/XoFTR/scripts/reproduce_train/visible_thermal.sh +35 -0
third_party/XoFTR/src/__init__.py +0 -0
third_party/XoFTR/src/config/default.py +203 -0
third_party/XoFTR/src/datasets/megadepth.py +143 -0
third_party/XoFTR/src/datasets/pretrain_dataset.py +156 -0
third_party/XoFTR/src/datasets/sampler.py +77 -0
third_party/XoFTR/src/datasets/scannet.py +114 -0
third_party/XoFTR/src/datasets/vistir.py +109 -0
third_party/XoFTR/src/lightning/data.py +346 -0
third_party/XoFTR/src/lightning/data_pretrain.py +125 -0
third_party/XoFTR/src/lightning/lightning_xoftr.py +334 -0
third_party/XoFTR/src/lightning/lightning_xoftr_pretrain.py +171 -0
third_party/XoFTR/src/losses/xoftr_loss.py +170 -0
third_party/XoFTR/src/losses/xoftr_loss_pretrain.py +37 -0
third_party/XoFTR/src/optimizers/__init__.py +42 -0
third_party/XoFTR/src/utils/augment.py +113 -0
third_party/XoFTR/src/utils/comm.py +265 -0
third_party/XoFTR/src/utils/data_io.py +144 -0

README.md CHANGED Viewed

@@ -34,6 +34,7 @@ Here is a demo of the tool:
 ![demo](assets/demo.gif)
 The tool currently supports various popular image matching algorithms, namely:
 - [x] [EfficientLoFTR](https://github.com/zju3dv/EfficientLoFTR), CVPR 2024
 - [x] [MASt3R](https://github.com/naver/mast3r), CVPR 2024
 - [x] [DUSt3R](https://github.com/naver/dust3r), CVPR 2024

 ![demo](assets/demo.gif)
 The tool currently supports various popular image matching algorithms, namely:
+- [x] [XoFTR](https://github.com/OnderT/XoFTR), CVPR 2024
 - [x] [EfficientLoFTR](https://github.com/zju3dv/EfficientLoFTR), CVPR 2024
 - [x] [MASt3R](https://github.com/naver/mast3r), CVPR 2024
 - [x] [DUSt3R](https://github.com/naver/dust3r), CVPR 2024

hloc/extractors/darkfeat.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import subprocess
 import sys
 from pathlib import Path



1	import sys
2	from pathlib import Path
3

hloc/extractors/rord.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import subprocess
 import sys
 from pathlib import Path



1	import sys
2	from pathlib import Path
3

hloc/extractors/sfd2.py CHANGED Viewed

@@ -26,9 +26,7 @@ class SFD2(BaseModel):
         )
         model_path = self._download_model(
             repo_id=MODEL_REPO_ID,
-            filename="{}/{}".format(
-                "pram", self.conf["model_name"]
-            ),
         )
         self.net = load_sfd2(weight_path=model_path).eval()

         )
         model_path = self._download_model(
             repo_id=MODEL_REPO_ID,
+            filename="{}/{}".format("pram", self.conf["model_name"]),
         )
         self.net = load_sfd2(weight_path=model_path).eval()

hloc/match_dense.py CHANGED Viewed

@@ -63,6 +63,25 @@ confs = {
         "max_error": 1,  # max error for assigned keypoints (in px)
         "cell_size": 1,  # size of quantization patch (max 1 kp/patch)
     },
     # "loftr_quadtree": {
     #     "output": "matches-loftr-quadtree",
     #     "model": {

         "max_error": 1,  # max error for assigned keypoints (in px)
         "cell_size": 1,  # size of quantization patch (max 1 kp/patch)
     },
+    "xoftr": {
+        "output": "matches-xoftr",
+        "model": {
+            "name": "xoftr",
+            "weights": "weights_xoftr_640.ckpt",
+            "max_keypoints": 2000,
+            "match_threshold": 0.3,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1024,
+            "dfactor": 8,
+            "width": 640,
+            "height": 480,
+            "force_resize": True,
+        },
+        "max_error": 1,  # max error for assigned keypoints (in px)
+        "cell_size": 1,  # size of quantization patch (max 1 kp/patch)
+    },
     # "loftr_quadtree": {
     #     "output": "matches-loftr-quadtree",
     #     "model": {

hloc/match_features.py CHANGED Viewed

@@ -347,8 +347,13 @@ def match_from_paths(
 def scale_keypoints(kpts, scale):
-    if np.any(scale != 1.0):
-        kpts *= kpts.new_tensor(scale)
     return kpts

 def scale_keypoints(kpts, scale):
+    if (
+        isinstance(scale, (list, tuple, np.ndarray))
+        and len(scale) == 2
+        and np.any(scale != np.array([1.0, 1.0]))
+    ):
+        kpts[:, 0] *= scale[0]  # scale x-dimension
+        kpts[:, 1] *= scale[1]  # scale y-dimension
     return kpts

hloc/matchers/aspanformer.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import subprocess
 import sys
 from pathlib import Path



1	import sys
2	from pathlib import Path
3

hloc/matchers/dkm.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import sys
 from pathlib import Path
-import torch
 from PIL import Image
 from hloc import DEVICE, MODEL_REPO_ID, logger

 import sys
 from pathlib import Path
 from PIL import Image
 from hloc import DEVICE, MODEL_REPO_ID, logger

hloc/matchers/gim.py CHANGED Viewed

@@ -3,28 +3,30 @@ from pathlib import Path
 import torch
-from .. import MODEL_REPO_ID, logger, DEVICE
 from ..utils.base_model import BaseModel
 gim_path = Path(__file__).parent / "../../third_party/gim"
 sys.path.append(str(gim_path))
 def load_model(weight_name, checkpoints_path):
     # load model
     model = None
     detector = None
     if weight_name == "gim_dkm":
         from gim.dkm.models.model_zoo.DKMv3 import DKMv3
         model = DKMv3(weights=None, h=672, w=896)
     elif weight_name == "gim_loftr":
         from gim.loftr.loftr import LoFTR
         from gim.loftr.misc import lower_config
-        from gim.loftr.config import get_cfg_defaults
         model = LoFTR(lower_config(get_cfg_defaults())["loftr"])
     elif weight_name == "gim_lightglue":
-        from gim.lightglue.superpoint import SuperPoint
         from gim.lightglue.models.matchers.lightglue import LightGlue
         detector = SuperPoint(
             {

 import torch
+from .. import DEVICE, MODEL_REPO_ID, logger
 from ..utils.base_model import BaseModel
 gim_path = Path(__file__).parent / "../../third_party/gim"
 sys.path.append(str(gim_path))
 def load_model(weight_name, checkpoints_path):
     # load model
     model = None
     detector = None
     if weight_name == "gim_dkm":
         from gim.dkm.models.model_zoo.DKMv3 import DKMv3
         model = DKMv3(weights=None, h=672, w=896)
     elif weight_name == "gim_loftr":
+        from gim.loftr.config import get_cfg_defaults
         from gim.loftr.loftr import LoFTR
         from gim.loftr.misc import lower_config
         model = LoFTR(lower_config(get_cfg_defaults())["loftr"])
     elif weight_name == "gim_lightglue":
         from gim.lightglue.models.matchers.lightglue import LightGlue
+        from gim.lightglue.superpoint import SuperPoint
         detector = SuperPoint(
             {

hloc/matchers/imp.py CHANGED Viewed

@@ -33,9 +33,7 @@ class IMP(BaseModel):
         self.conf = {**self.default_conf, **conf}
         model_path = self._download_model(
             repo_id=MODEL_REPO_ID,
-            filename="{}/{}".format(
-                'pram', self.conf["model_name"]
-            ),
         )
         # self.net = nets.gml(self.conf).eval().to(DEVICE)

         self.conf = {**self.default_conf, **conf}
         model_path = self._download_model(
             repo_id=MODEL_REPO_ID,
+            filename="{}/{}".format("pram", self.conf["model_name"]),
         )
         # self.net = nets.gml(self.conf).eval().to(DEVICE)

hloc/matchers/mickey.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import sys
 from pathlib import Path
-import torch
 from .. import MODEL_REPO_ID, logger
 from ..utils.base_model import BaseModel

 import sys
 from pathlib import Path
 from .. import MODEL_REPO_ID, logger
 from ..utils.base_model import BaseModel

hloc/matchers/omniglue.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import subprocess
 import sys
 from pathlib import Path



1	import sys
2	from pathlib import Path
3

hloc/matchers/xoftr.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import sys
+import warnings
+from pathlib import Path
+import torch
+from hloc import DEVICE, MODEL_REPO_ID
+tp_path = Path(__file__).parent / "../../third_party"
+sys.path.append(str(tp_path))
+from XoFTR.src.config.default import get_cfg_defaults
+from XoFTR.src.utils.misc import lower_config
+from XoFTR.src.xoftr import XoFTR as XoFTR_
+from hloc import logger
+from ..utils.base_model import BaseModel
+class XoFTR(BaseModel):
+    default_conf = {
+        "model_name": "weights_xoftr_640.ckpt",
+        "match_threshold": 0.3,
+        "max_keypoints": -1,
+    }
+    required_inputs = ["image0", "image1"]
+    def _init(self, conf):
+        # Get default configurations
+        config_ = get_cfg_defaults(inference=True)
+        config_ = lower_config(config_)
+        # Coarse level threshold
+        config_["xoftr"]["match_coarse"]["thr"] = self.conf["match_threshold"]
+        # Fine level threshold
+        config_["xoftr"]["fine"]["thr"] = 0.1  # Default 0.1
+        # It is posseble to get denser matches
+        # If True, xoftr returns all fine-level matches for each fine-level window (at 1/2 resolution)
+        config_["xoftr"]["fine"]["denser"] = False  # Default False
+        # XoFTR model
+        matcher = XoFTR_(config=config_["xoftr"])
+        model_path = self._download_model(
+            repo_id=MODEL_REPO_ID,
+            filename="{}/{}".format(
+                Path(__file__).stem, self.conf["model_name"]
+            ),
+        )
+        # Load model
+        state_dict = torch.load(model_path, map_location="cpu")["state_dict"]
+        matcher.load_state_dict(state_dict, strict=True)
+        matcher = matcher.eval().to(DEVICE)
+        self.net = matcher
+        logger.info(f"Loaded XoFTR with weights {conf['model_name']}")
+    def _forward(self, data):
+        # For consistency with hloc pairs, we refine kpts in image0!
+        rename = {
+            "keypoints0": "keypoints1",
+            "keypoints1": "keypoints0",
+            "image0": "image1",
+            "image1": "image0",
+            "mask0": "mask1",
+            "mask1": "mask0",
+        }
+        data_ = {rename[k]: v for k, v in data.items()}
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            pred = self.net(data_)
+        pred = {
+            "keypoints0": data_["mkpts0_f"],
+            "keypoints1": data_["mkpts1_f"],
+        }
+        scores = data_["mconf_f"]
+        top_k = self.conf["max_keypoints"]
+        if top_k is not None and len(scores) > top_k:
+            keep = torch.argsort(scores, descending=True)[:top_k]
+            pred["keypoints0"], pred["keypoints1"] = (
+                pred["keypoints0"][keep],
+                pred["keypoints1"][keep],
+            )
+            scores = scores[keep]
+        # Switch back indices
+        pred = {(rename[k] if k in rename else k): v for k, v in pred.items()}
+        pred["scores"] = scores
+        return pred

third_party/XoFTR/LICENSE ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

third_party/XoFTR/README.md ADDED Viewed

	@@ -0,0 +1,115 @@

+# XoFTR: Cross-modal Feature Matching Transformer
+### [Paper (arXiv)](https://arxiv.org/pdf/2404.09692) | [Paper (CVF)](https://openaccess.thecvf.com/content/CVPR2024W/IMW/papers/Tuzcuoglu_XoFTR_Cross-modal_Feature_Matching_Transformer_CVPRW_2024_paper.pdf)
+<br/>
+This is Pytorch implementation of XoFTR: Cross-modal Feature Matching Transformer [CVPR 2024 Image Matching Workshop](https://image-matching-workshop.github.io/) paper.
+XoFTR is a cross-modal cross-view method for local feature matching between thermal infrared (TIR) and visible images.
+<!-- ![teaser](assets/figures/teaser.png) -->
+<p align="center">
+<img src="assets/figures/teaser.png" alt="teaser" width="500"/>
+</p>
+## Colab demo
+To run XoFTR with custom image pairs without configuring your own GPU environment, you can use the Colab demo:
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1T495vybejujZjJlPY-sHm8YwV5Ss86AM?usp=sharing)
+## Installation
+```shell
+conda env create -f environment.yaml
+conda activate xoftr
+```
+Download links for
+  - [Pretrained models weights](https://drive.google.com/drive/folders/1RAI243OHuyZ4Weo1NiTy280bCE_82s4q?usp=drive_link): Two versions available, trained at 640 and 840 resolutions.
+  - [METU-VisTIR dataset](https://drive.google.com/file/d/1Sj_vxj-GXvDQIMSg-ZUJR0vHBLIeDrLg/view?usp=sharing)
+## METU-VisTIR Dataset
+<!-- ![dataset](assets/figures/dataset.png) -->
+<p align="center">
+<img src="assets/figures/dataset.png" alt="dataset" width="600"/>
+</p>
+This dataset includes thermal and visible images captured across six diverse scenes with ground-truth camera poses. Four of the scenes encompass images captured under both cloudy and sunny conditions, while the remaining two scenes exclusively feature cloudy conditions. Since the cameras are auto-focus, there may be result in slight imperfections in the ground truth camera parameters. For more information about the dataset, please refer to our [paper](https://arxiv.org/pdf/2404.09692).
+**License of the dataset:**
+The METU-VisTIR dataset is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.en).
+### Data format
+The dataset is organized into folders according to scenarios. The organization format is as follows:
+```
+METU-VisTIR/
+├── index/
+│     ├── scene_info_test/
+│     │     ├── cloudy_cloudy_scene_1.npz   # scene info with test pairs
+│     │     └── ...
+│     ├── scene_info_val/
+│     │     ├── cloudy_cloudy_scene_1.npz   # scene info with val pairs
+│     │     └── ...
+│     └── val_test_list/
+│           ├── test_list.txt               # test scenes list
+│           └── val_list.txt                # val scenes list
+├── cloudy/                                 # cloudy scenes
+│     ├── scene_1/
+│     │     ├── thermal/
+│     │     │      └── images/              # thermal images
+│     │     └── visible/
+│     │            └── images/              # visible images
+│     └── ...
+└── sunny/                                  # sunny scenes
+      └── ...
+```
+cloudy_cloudy_scene_\*.npz and cloudy_sunny_scene_\*.npz files contain GT camera poses and image pairs
+## Runing XoFTR
+### Demo to match image pairs with XoFTR
+A <span style="color:red">demo notebook</span> for XoFTR on a single pair of images is given in [notebooks/xoftr_demo.ipynb](notebooks/xoftr_demo.ipynb).
+### Reproduce the testing results for relative pose estimation
+You need to download METU-VisTIR dataset. After downloading, unzip the required files. Then, symlinks need to be created for the `data` folder.
+```shell
+unzip downloaded-file.zip
+# set up symlinks
+ln -s /path/to/METU_VisTIR/ /path/to/XoFTR/data/
+```
+```shell
+conda activate xoftr
+python test_relative_pose.py xoftr --ckpt weights/weights_xoftr_640.ckpt
+# with visualization
+python test_relative_pose.py xoftr --ckpt weights/weights_xoftr_640.ckpt --save_figs
+```
+The results and figures are saved to `results_relative_pose/`.
+<br/>
+## Training
+See [Training XoFTR](./docs/TRAINING.md) for more details.
+## Citation
+If you find this code useful for your research, please use the following BibTeX entry.
+```bibtex
+@inproceedings{tuzcuouglu2024xoftr,
+  title={XoFTR: Cross-modal Feature Matching Transformer},
+  author={Tuzcuo{\u{g}}lu, {\"O}nder and K{\"o}ksal, Aybora and Sofu, Bu{\u{g}}ra and Kalkan, Sinan and Alatan, A Aydin},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={4275--4286},
+  year={2024}
+}
+```
+## Acknowledgement
+This code is derived from [LoFTR](https://github.com/zju3dv/LoFTR). We are grateful to the authors for their contribution of the source code.

third_party/XoFTR/configs/data/__init__.py ADDED Viewed

File without changes

third_party/XoFTR/configs/data/base.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""
+The data config will be the last one merged into the main config.
+Setups in data configs will override all existed setups!
+"""
+from yacs.config import CfgNode as CN
+_CN = CN()
+_CN.DATASET = CN()
+_CN.TRAINER = CN()
+# training data config
+_CN.DATASET.TRAIN_DATA_ROOT = None
+_CN.DATASET.TRAIN_POSE_ROOT = None
+_CN.DATASET.TRAIN_NPZ_ROOT = None
+_CN.DATASET.TRAIN_LIST_PATH = None
+_CN.DATASET.TRAIN_INTRINSIC_PATH = None
+# validation set config
+_CN.DATASET.VAL_DATA_ROOT = None
+_CN.DATASET.VAL_POSE_ROOT = None
+_CN.DATASET.VAL_NPZ_ROOT = None
+_CN.DATASET.VAL_LIST_PATH = None
+_CN.DATASET.VAL_INTRINSIC_PATH = None
+# testing data config
+_CN.DATASET.TEST_DATA_ROOT = None
+_CN.DATASET.TEST_POSE_ROOT = None
+_CN.DATASET.TEST_NPZ_ROOT = None
+_CN.DATASET.TEST_LIST_PATH = None
+_CN.DATASET.TEST_INTRINSIC_PATH = None
+# dataset config
+_CN.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.4
+_CN.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0  # for both test and val
+cfg = _CN

third_party/XoFTR/configs/data/megadepth_trainval_840.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from configs.data.base import cfg
+TRAIN_BASE_PATH = "data/megadepth/index"
+cfg.DATASET.TRAINVAL_DATA_SOURCE = "MegaDepth"
+cfg.DATASET.TRAIN_DATA_ROOT = "data/megadepth/train"
+cfg.DATASET.TRAIN_NPZ_ROOT = f"{TRAIN_BASE_PATH}/scene_info_0.1_0.7"
+cfg.DATASET.TRAIN_LIST_PATH = f"{TRAIN_BASE_PATH}/trainvaltest_list/train_list.txt"
+cfg.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.0
+TEST_BASE_PATH = "data/megadepth/index"
+cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth"
+cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "data/megadepth/test"
+cfg.DATASET.VAL_NPZ_ROOT = cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}/scene_info_val_1500"
+cfg.DATASET.VAL_LIST_PATH = cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/trainvaltest_list/val_list.txt"
+cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0   # for both test and val
+# 368 scenes in total for MegaDepth
+# (with difficulty balanced (further split each scene to 3 sub-scenes))
+cfg.TRAINER.N_SAMPLES_PER_SUBSET = 100
+cfg.DATASET.MGDPT_IMG_RESIZE = 840  # for training on 32GB meme GPUs

third_party/XoFTR/configs/data/megadepth_vistir_trainval_640.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from configs.data.base import cfg
+TRAIN_BASE_PATH = "data/megadepth/index"
+cfg.DATASET.TRAIN_DATA_SOURCE = "MegaDepth"
+cfg.DATASET.TRAIN_DATA_ROOT = "data/megadepth/train"
+cfg.DATASET.TRAIN_NPZ_ROOT = f"{TRAIN_BASE_PATH}/scene_info_0.1_0.7"
+cfg.DATASET.TRAIN_LIST_PATH = f"{TRAIN_BASE_PATH}/trainvaltest_list/train_list.txt"
+cfg.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.0
+VAL_BASE_PATH = "data/METU_VisTIR/index"
+cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth"
+cfg.DATASET.VAL_DATA_SOURCE = "VisTir"
+cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "data/METU_VisTIR"
+cfg.DATASET.VAL_NPZ_ROOT = cfg.DATASET.TEST_NPZ_ROOT = f"{VAL_BASE_PATH}/scene_info_val"
+cfg.DATASET.VAL_LIST_PATH = cfg.DATASET.TEST_LIST_PATH = f"{VAL_BASE_PATH}/val_test_list/val_list.txt"
+cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0   # for both test and val
+# 368 scenes in total for MegaDepth
+# (with difficulty balanced (further split each scene to 3 sub-scenes))
+cfg.TRAINER.N_SAMPLES_PER_SUBSET = 100
+cfg.DATASET.MGDPT_IMG_RESIZE = 640  # for training on 11GB mem GPUs

third_party/XoFTR/configs/data/pretrain.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from configs.data.base import cfg
+cfg.DATASET.TRAIN_DATA_SOURCE = "KAIST"
+cfg.DATASET.TRAIN_DATA_ROOT = "data/kaist-cvpr15"
+cfg.DATASET.VAL_DATA_SOURCE = "KAIST"
+cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "data/kaist-cvpr15"
+cfg.DATASET.PRETRAIN_IMG_RESIZE = 640

third_party/XoFTR/configs/xoftr/outdoor/visible_thermal.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from src.config.default import _CN as cfg
+cfg.XOFTR.MATCH_COARSE.MATCH_TYPE = 'dual_softmax'
+cfg.TRAINER.CANONICAL_LR = 8e-3
+cfg.TRAINER.WARMUP_STEP = 1875  # 3 epochs
+cfg.TRAINER.WARMUP_RATIO = 0.1
+cfg.TRAINER.MSLR_MILESTONES = [8, 12, 16, 20, 24, 30, 36, 42]
+# pose estimation
+cfg.TRAINER.RANSAC_PIXEL_THR = 1.5
+cfg.TRAINER.OPTIMIZER = "adamw"
+cfg.TRAINER.ADAMW_DECAY = 0.1
+cfg.XOFTR.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.3
+cfg.TRAINER.USE_WANDB = True # use weight and biases

third_party/XoFTR/configs/xoftr/pretrain/pretrain.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from src.config.default import _CN as cfg
+cfg.TRAINER.CANONICAL_LR = 4e-3
+cfg.TRAINER.WARMUP_STEP = 1250  # 2 epochs
+cfg.TRAINER.WARMUP_RATIO = 0.1
+cfg.TRAINER.MSLR_MILESTONES = [4, 6, 8, 10, 12, 14, 16, 18]
+cfg.TRAINER.OPTIMIZER = "adamw"
+cfg.TRAINER.ADAMW_DECAY = 0.1
+cfg.TRAINER.USE_WANDB = True # use weight and biases

third_party/XoFTR/data/megadepth/index/.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore

third_party/XoFTR/data/megadepth/test/.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore

third_party/XoFTR/data/megadepth/train/.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore

third_party/XoFTR/docs/TRAINING.md ADDED Viewed

	@@ -0,0 +1,63 @@

+# Traininig XoFTR
+## Dataset setup
+Generally, two parts of data are needed for training XoFTR, the original dataset, i.e., MegaDepth and KAIST Multispectral Pedestrian Detection Benchmark dataset. For MegaDepth the offline generated dataset indices are also required. The dataset indices store scenes, image pairs, and other metadata within the dataset used for training. For the MegaDepth dataset, the relative poses between images used for training are directly cached in the indexing files.
+### Download datasets
+#### MegaDepth
+In the fine-tuning stage, we use depth maps, undistorted images, corresponding camera intrinsics and extrinsics provided in the [original MegaDepth dataset](https://www.cs.cornell.edu/projects/megadepth/).
+- Please download [MegaDepth undistorted images and processed depths](https://www.cs.cornell.edu/projects/megadepth/dataset/Megadepth_v1/MegaDepth_v1.tar.gz)
+    - The path of the download data will be referred to as `/path/to/megadepth`
+#### KAIST Multispectral Pedestrian Detection Benchmark dataset
+In the pre-training stage, we use LWIR and visible image pairs from [KAIST Multispectral Pedestrian Detection Benchmark](https://soonminhwang.github.io/rgbt-ped-detection/).
+- Please set up the KAIST Multispectral Pedestrian Detection Benchmark dataset following [the official guide](https://github.com/SoonminHwang/rgbt-ped-detection) or from [OneDrive link](https://onedrive.live.com/download?cid=1570430EADF56512&resid=1570430EADF56512%21109419&authkey=AJcMP-7Yp86PWoE)
+    - At the end, you should have the folder `kaist-cvpr15`, referred as `/path/to/kaist-cvpr15`
+### Download the dataset indices
+You can download the required dataset indices from the [following link](https://drive.google.com/drive/folders/1DOcOPZb3-5cWxLqn256AhwUVjBPifhuf).
+After downloading, unzip the required files.
+```shell
+unzip downloaded-file.zip
+# extract dataset indices
+tar xf train-data/megadepth_indices.tar
+```
+### Build the dataset symlinks
+We symlink the datasets to the `data` directory under the main XoFTR project directory.
+```shell
+# MegaDepth
+# -- # fine-tuning dataset
+ln -sv /path/to/megadepth/phoenix /path/to/XoFTR/data/megadepth/train
+# -- # dataset indices
+ln -s /path/to/megadepth_indices/* /path/to/XoFTR/data/megadepth/index
+# KAIST Multispectral Pedestrian Detection Benchmark dataset
+# -- # pre-training dataset
+ln -sv /path/to/kaist-cvpr15 /path/to/XoFTR/data
+```
+## Training
+We provide pre-training and fine-tuning scripts for the datasets. The results in the XoFTR paper can be reproduced with 2 RTX A5000 (24 GB) GPUs for pre-training and 8 A100 GPUs for fine-tuning. For a different setup, we scale the learning rate and its warm-up linearly, but the final evaluation results might vary due to the different batch size & learning rate used. Thus the reproduction of results in our paper is not guaranteed.
+### Pre-training
+``` shell
+scripts/reproduce_train/pretrain.sh
+```
+> NOTE: Originally, we used 2 GPUs with a batch size of 2. You can change the number of GPUs and batch size in the script as per your need.
+### Fine-tuning on MegaDepth
+In the script, the path for pre-trained weights is `pretrain_weights/epoch=8-.ckpt`. We used the weight of the 9th epoch from the pre-training stage (epoch numbers start from 0). You can change this ckpt path accordingly.
+``` shell
+scripts/reproduce_train/visible_thermal.sh
+```
+> NOTE: Originally, we used 8 GPUs with a batch size of 2. You can change the number of GPUs and batch size in the script as per your need.

third_party/XoFTR/environment.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+name: xoftr
+channels:
+  # - https://dx-mirrors.sensetime.com/anaconda/cloud/pytorch
+  - pytorch
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.8
+  - pytorch=2.0.1
+  - pytorch-cuda=11.8
+  - pip
+  - pip:
+      - -r requirements.txt

third_party/XoFTR/notebooks/xoftr_demo.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

third_party/XoFTR/notebooks/xoftr_demo_batch.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

third_party/XoFTR/pretrain.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import math
+import argparse
+import pprint
+from distutils.util import strtobool
+from pathlib import Path
+from loguru import logger as loguru_logger
+from datetime import datetime
+import pytorch_lightning as pl
+from pytorch_lightning.utilities import rank_zero_only
+from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
+from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
+from pytorch_lightning.plugins import DDPPlugin
+from src.config.default import get_cfg_defaults
+from src.utils.misc import get_rank_zero_only_logger, setup_gpus
+from src.utils.profiler import build_profiler
+from src.lightning.data_pretrain import PretrainDataModule
+from src.lightning.lightning_xoftr_pretrain import PL_XoFTR_Pretrain
+loguru_logger = get_rank_zero_only_logger(loguru_logger)
+def parse_args():
+    # init a costum parser which will be added into pl.Trainer parser
+    # check documentation: https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#trainer-flags
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        'data_cfg_path', type=str, help='data config path')
+    parser.add_argument(
+        'main_cfg_path', type=str, help='main config path')
+    parser.add_argument(
+        '--exp_name', type=str, default='default_exp_name')
+    parser.add_argument(
+        '--batch_size', type=int, default=4, help='batch_size per gpu')
+    parser.add_argument(
+        '--num_workers', type=int, default=4)
+    parser.add_argument(
+        '--pin_memory', type=lambda x: bool(strtobool(x)),
+        nargs='?', default=True, help='whether loading data to pinned memory or not')
+    parser.add_argument(
+        '--ckpt_path', type=str, default=None,
+        help='pretrained checkpoint path')
+    parser.add_argument(
+        '--disable_ckpt', action='store_true',
+        help='disable checkpoint saving (useful for debugging).')
+    parser.add_argument(
+        '--profiler_name', type=str, default=None,
+        help='options: [inference, pytorch], or leave it unset')
+    parser.add_argument(
+        '--parallel_load_data', action='store_true',
+        help='load datasets in with multiple processes.')
+    parser = pl.Trainer.add_argparse_args(parser)
+    return parser.parse_args()
+def main():
+    # parse arguments
+    args = parse_args()
+    rank_zero_only(pprint.pprint)(vars(args))
+    # init default-cfg and merge it with the main- and data-cfg
+    config = get_cfg_defaults()
+    config.merge_from_file(args.main_cfg_path)
+    config.merge_from_file(args.data_cfg_path)
+    pl.seed_everything(config.TRAINER.SEED)  # reproducibility
+    # scale lr and warmup-step automatically
+    args.gpus = _n_gpus = setup_gpus(args.gpus)
+    config.TRAINER.WORLD_SIZE = _n_gpus * args.num_nodes
+    config.TRAINER.TRUE_BATCH_SIZE = config.TRAINER.WORLD_SIZE * args.batch_size
+    _scaling = config.TRAINER.TRUE_BATCH_SIZE / config.TRAINER.CANONICAL_BS
+    config.TRAINER.SCALING = _scaling
+    config.TRAINER.TRUE_LR = config.TRAINER.CANONICAL_LR * _scaling
+    config.TRAINER.WARMUP_STEP = math.floor(config.TRAINER.WARMUP_STEP / _scaling)
+    # lightning module
+    profiler = build_profiler(args.profiler_name)
+    model = PL_XoFTR_Pretrain(config, pretrained_ckpt=args.ckpt_path, profiler=profiler)
+    loguru_logger.info(f"XoFTR LightningModule initialized!")
+    # lightning data
+    data_module = PretrainDataModule(args, config)
+    loguru_logger.info(f"XoFTR DataModule initialized!")
+    # TensorBoard Logger
+    logger = [TensorBoardLogger(save_dir='logs/tb_logs', name=args.exp_name, default_hp_metric=False)]
+    ckpt_dir = Path(logger[0].log_dir) / 'checkpoints'
+    if config.TRAINER.USE_WANDB:
+        logger.append(WandbLogger(name=args.exp_name + f"_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}",
+                                project='XoFTR'))
+    # Callbacks
+    # TODO: update ModelCheckpoint to monitor multiple metrics
+    ckpt_callback = ModelCheckpoint(verbose=True, save_top_k=-1,
+                                    save_last=True,
+                                    dirpath=str(ckpt_dir),
+                                    filename='{epoch}')
+    lr_monitor = LearningRateMonitor(logging_interval='step')
+    callbacks = [lr_monitor]
+    if not args.disable_ckpt:
+        callbacks.append(ckpt_callback)
+    # Lightning Trainer
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        plugins=DDPPlugin(find_unused_parameters=True,
+                          num_nodes=args.num_nodes,
+                          sync_batchnorm=config.TRAINER.WORLD_SIZE > 0),
+        gradient_clip_val=config.TRAINER.GRADIENT_CLIPPING,
+        callbacks=callbacks,
+        logger=logger,
+        sync_batchnorm=config.TRAINER.WORLD_SIZE > 0,
+        replace_sampler_ddp=False,  # use custom sampler
+        reload_dataloaders_every_epoch=False,  # avoid repeated samples!
+        weights_summary='full',
+        profiler=profiler)
+    loguru_logger.info(f"Trainer initialized!")
+    loguru_logger.info(f"Start training!")
+    trainer.fit(model, datamodule=data_module)
+if __name__ == '__main__':
+    main()

third_party/XoFTR/requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+numpy==1.23.1
+opencv_python==4.5.1.48
+albumentations==0.5.1 --no-binary=imgaug,albumentations
+ray>=1.0.1
+einops==0.3.0
+kornia==0.4.1
+loguru==0.5.3
+yacs>=0.1.8
+tqdm==4.65.0
+autopep8
+pylint
+ipython
+jupyterlab
+matplotlib
+h5py==3.1.0
+pytorch-lightning==1.3.5
+torchmetrics==0.6.0  # version problem: https://github.com/NVIDIA/DeepLearningExamples/issues/1113#issuecomment-1102969461
+joblib>=1.0.1
+wandb

third_party/XoFTR/scripts/reproduce_train/pretrain.sh ADDED Viewed

	@@ -0,0 +1,31 @@

+#!/bin/bash -l
+SCRIPTPATH=$(dirname $(readlink -f "$0"))
+PROJECT_DIR="${SCRIPTPATH}/../../"
+# conda activate loftr
+export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH
+cd $PROJECT_DIR
+data_cfg_path="configs/data/pretrain.py"
+main_cfg_path="configs/xoftr/pretrain/pretrain.py"
+n_nodes=1
+n_gpus_per_node=2
+torch_num_workers=16
+batch_size=2
+pin_memory=true
+exp_name="pretrain-${TRAIN_IMG_SIZE}-bs=$(($n_gpus_per_node * $n_nodes * $batch_size))"
+python -u ./pretrain.py \
+    ${data_cfg_path} \
+    ${main_cfg_path} \
+    --exp_name=${exp_name} \
+    --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \
+    --batch_size=${batch_size} --num_workers=${torch_num_workers} --pin_memory=${pin_memory} \
+    --check_val_every_n_epoch=1 \
+    --log_every_n_steps=100 \
+    --limit_val_batches=1. \
+    --num_sanity_val_steps=10 \
+    --benchmark=True \
+    --max_epochs=15

third_party/XoFTR/scripts/reproduce_train/visible_thermal.sh ADDED Viewed

	@@ -0,0 +1,35 @@

+#!/bin/bash -l
+SCRIPTPATH=$(dirname $(readlink -f "$0"))
+PROJECT_DIR="${SCRIPTPATH}/../../"
+# conda activate xoftr
+export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH
+cd $PROJECT_DIR
+TRAIN_IMG_SIZE=640
+# TRAIN_IMG_SIZE=840
+data_cfg_path="configs/data/megadepth_vistir_trainval_${TRAIN_IMG_SIZE}.py"
+main_cfg_path="configs/xoftr/outdoor/visible_thermal.py"
+n_nodes=1
+n_gpus_per_node=8
+torch_num_workers=16
+batch_size=2
+pin_memory=true
+exp_name="visible_thermal-${TRAIN_IMG_SIZE}-bs=$(($n_gpus_per_node * $n_nodes * $batch_size))"
+ckpt_path="pretrain_weights/epoch=8-.ckpt"
+python -u ./train.py \
+    ${data_cfg_path} \
+    ${main_cfg_path} \
+    --exp_name=${exp_name} \
+    --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \
+    --batch_size=${batch_size} --num_workers=${torch_num_workers} --pin_memory=${pin_memory} \
+    --check_val_every_n_epoch=1 \
+    --log_every_n_steps=100 \
+    --limit_val_batches=1. \
+    --num_sanity_val_steps=10 \
+    --benchmark=True \
+    --max_epochs=30 \
+    --ckpt_path=${ckpt_path}

third_party/XoFTR/src/__init__.py ADDED Viewed

File without changes

third_party/XoFTR/src/config/default.py ADDED Viewed

	@@ -0,0 +1,203 @@

+from yacs.config import CfgNode as CN
+INFERENCE = False
+_CN = CN()
+##############  ↓  XoFTR Pipeline  ↓  ##############
+_CN.XOFTR = CN()
+_CN.XOFTR.RESOLUTION = (8, 2)  # options: [(8, 2)]
+_CN.XOFTR.FINE_WINDOW_SIZE = 5  # window_size in fine_level, must be odd
+_CN.XOFTR.MEDIUM_WINDOW_SIZE = 3  # window_size in fine_level, must be odd
+# 1. XoFTR-backbone (local feature CNN) config
+_CN.XOFTR.RESNET = CN()
+_CN.XOFTR.RESNET.INITIAL_DIM = 128
+_CN.XOFTR.RESNET.BLOCK_DIMS = [128, 196, 256]  # s1, s2, s3
+# 2. XoFTR-coarse module config
+_CN.XOFTR.COARSE = CN()
+_CN.XOFTR.COARSE.INFERENCE = INFERENCE
+_CN.XOFTR.COARSE.D_MODEL = 256
+_CN.XOFTR.COARSE.D_FFN = 256
+_CN.XOFTR.COARSE.NHEAD = 8
+_CN.XOFTR.COARSE.LAYER_NAMES = ['self', 'cross'] * 4
+_CN.XOFTR.COARSE.ATTENTION = 'linear'  # options: ['linear', 'full']
+# 3. Coarse-Matching config
+_CN.XOFTR.MATCH_COARSE = CN()
+_CN.XOFTR.MATCH_COARSE.INFERENCE = INFERENCE
+_CN.XOFTR.MATCH_COARSE.D_MODEL = 256
+_CN.XOFTR.MATCH_COARSE.THR = 0.3
+_CN.XOFTR.MATCH_COARSE.BORDER_RM = 2
+_CN.XOFTR.MATCH_COARSE.MATCH_TYPE = 'dual_softmax'  # options: ['dual_softmax']
+_CN.XOFTR.MATCH_COARSE.DSMAX_TEMPERATURE = 0.1
+_CN.XOFTR.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.2  # training tricks: save GPU memory
+_CN.XOFTR.MATCH_COARSE.TRAIN_PAD_NUM_GT_MIN = 200  # training tricks: avoid DDP deadlock
+# 4. XoFTR-fine module config
+_CN.XOFTR.FINE = CN()
+_CN.XOFTR.FINE.DENSER = False # if true, match all features in fine-level windows
+_CN.XOFTR.FINE.INFERENCE = INFERENCE
+_CN.XOFTR.FINE.DSMAX_TEMPERATURE = 0.1
+_CN.XOFTR.FINE.THR = 0.1
+_CN.XOFTR.FINE.MLP_HIDDEN_DIM_COEF = 2 # coef for  mlp hidden dim (hidden_dim = feat_dim * coef)
+_CN.XOFTR.FINE.NHEAD_FINE_LEVEL = 8
+_CN.XOFTR.FINE.NHEAD_MEDIUM_LEVEL = 7
+# 5. XoFTR Losses
+_CN.XOFTR.LOSS = CN()
+_CN.XOFTR.LOSS.FOCAL_ALPHA = 0.25
+_CN.XOFTR.LOSS.FOCAL_GAMMA = 2.0
+_CN.XOFTR.LOSS.POS_WEIGHT = 1.0
+_CN.XOFTR.LOSS.NEG_WEIGHT = 1.0
+# -- # coarse-level
+_CN.XOFTR.LOSS.COARSE_WEIGHT = 0.5
+# -- # fine-level
+_CN.XOFTR.LOSS.FINE_WEIGHT = 0.3
+# -- # sub-pixel
+_CN.XOFTR.LOSS.SUB_WEIGHT = 1 * 10**4
+##############  Dataset  ##############
+_CN.DATASET = CN()
+# 1. data config
+# training and validating
+_CN.DATASET.TRAIN_DATA_SOURCE = None  # options: ['ScanNet', 'MegaDepth']
+_CN.DATASET.TRAIN_DATA_ROOT = None
+_CN.DATASET.TRAIN_POSE_ROOT = None  # (optional directory for poses)
+_CN.DATASET.TRAIN_NPZ_ROOT = None
+_CN.DATASET.TRAIN_LIST_PATH = None
+_CN.DATASET.TRAIN_INTRINSIC_PATH = None
+_CN.DATASET.VAL_DATA_SOURCE = None
+_CN.DATASET.VAL_DATA_ROOT = None
+_CN.DATASET.VAL_POSE_ROOT = None  # (optional directory for poses)
+_CN.DATASET.VAL_NPZ_ROOT = None
+_CN.DATASET.VAL_LIST_PATH = None    # None if val data from all scenes are bundled into a single npz file
+_CN.DATASET.VAL_INTRINSIC_PATH = None
+# testing
+_CN.DATASET.TEST_DATA_SOURCE = None
+_CN.DATASET.TEST_DATA_ROOT = None
+_CN.DATASET.TEST_POSE_ROOT = None  # (optional directory for poses)
+_CN.DATASET.TEST_NPZ_ROOT = None
+_CN.DATASET.TEST_LIST_PATH = None   # None if test data from all scenes are bundled into a single npz file
+_CN.DATASET.TEST_INTRINSIC_PATH = None
+# 2. dataset config
+# general options
+_CN.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.4  # discard data with overlap_score < min_overlap_score
+_CN.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0
+_CN.DATASET.AUGMENTATION_TYPE = "rgb_thermal"  # options: [None, 'dark', 'mobile']
+# MegaDepth options
+_CN.DATASET.MGDPT_IMG_RESIZE = 640  # resize the longer side, zero-pad bottom-right to square.
+_CN.DATASET.MGDPT_IMG_PAD = True  # pad img to square with size = MGDPT_IMG_RESIZE
+_CN.DATASET.MGDPT_DEPTH_PAD = True  # pad depthmap to square with size = 2000
+_CN.DATASET.MGDPT_DF = 8
+# VisTir options
+_CN.DATASET.VISTIR_IMG_RESIZE = 640  # resize the longer side, zero-pad bottom-right to square.
+_CN.DATASET.VISTIR_IMG_PAD = False  # pad img to square with size = VISTIR_IMG_RESIZE
+_CN.DATASET.VISTIR_DF = 8
+# Pretrain dataset options
+_CN.DATASET.PRETRAIN_IMG_RESIZE = 640  # resize the longer side, zero-pad bottom-right to square.
+_CN.DATASET.PRETRAIN_IMG_PAD = True  # pad img to square with size = PRETRAIN_IMG_RESIZE
+_CN.DATASET.PRETRAIN_DF = 8
+_CN.DATASET.PRETRAIN_FRAME_GAP = 2 # the gap between video frames of Kaist dataset
+##############  Trainer  ##############
+_CN.TRAINER = CN()
+_CN.TRAINER.WORLD_SIZE = 1
+_CN.TRAINER.CANONICAL_BS = 64
+_CN.TRAINER.CANONICAL_LR = 6e-3
+_CN.TRAINER.SCALING = None  # this will be calculated automatically
+_CN.TRAINER.FIND_LR = False  # use learning rate finder from pytorch-lightning
+_CN.TRAINER.USE_WANDB = False # use weight and biases
+# optimizer
+_CN.TRAINER.OPTIMIZER = "adamw"  # [adam, adamw]
+_CN.TRAINER.TRUE_LR = None  # this will be calculated automatically at runtime
+_CN.TRAINER.ADAM_DECAY = 0.  # ADAM: for adam
+_CN.TRAINER.ADAMW_DECAY = 0.1
+# step-based warm-up
+_CN.TRAINER.WARMUP_TYPE = 'linear'  # [linear, constant]
+_CN.TRAINER.WARMUP_RATIO = 0.
+_CN.TRAINER.WARMUP_STEP = 4800
+# learning rate scheduler
+_CN.TRAINER.SCHEDULER = 'MultiStepLR'  # [MultiStepLR, CosineAnnealing, ExponentialLR]
+_CN.TRAINER.SCHEDULER_INTERVAL = 'epoch'    # [epoch, step]
+_CN.TRAINER.MSLR_MILESTONES = [3, 6, 9, 12]  # MSLR: MultiStepLR
+_CN.TRAINER.MSLR_GAMMA = 0.5
+_CN.TRAINER.COSA_TMAX = 30  # COSA: CosineAnnealing
+_CN.TRAINER.ELR_GAMMA = 0.999992  # ELR: ExponentialLR, this value for 'step' interval
+# plotting related
+_CN.TRAINER.ENABLE_PLOTTING = True
+_CN.TRAINER.N_VAL_PAIRS_TO_PLOT = 128     # number of val/test paris for plotting
+_CN.TRAINER.PLOT_MODE = 'evaluation'  # ['evaluation', 'confidence']
+_CN.TRAINER.PLOT_MATCHES_ALPHA = 'dynamic'
+# geometric metrics and pose solver
+_CN.TRAINER.EPI_ERR_THR = 5e-4  # recommendation: 5e-4 for ScanNet, 1e-4 for MegaDepth (from SuperGlue)
+_CN.TRAINER.POSE_GEO_MODEL = 'E'  # ['E', 'F', 'H']
+_CN.TRAINER.POSE_ESTIMATION_METHOD = 'RANSAC'  # [RANSAC, DEGENSAC, MAGSAC]
+_CN.TRAINER.RANSAC_PIXEL_THR = 0.5
+_CN.TRAINER.RANSAC_CONF = 0.99999
+_CN.TRAINER.RANSAC_MAX_ITERS = 10000
+_CN.TRAINER.USE_MAGSACPP = False
+# data sampler for train_dataloader
+_CN.TRAINER.DATA_SAMPLER = 'scene_balance'  # options: ['scene_balance', 'random', 'normal']
+# 'scene_balance' config
+_CN.TRAINER.N_SAMPLES_PER_SUBSET = 200
+_CN.TRAINER.SB_SUBSET_SAMPLE_REPLACEMENT = True  # whether sample each scene with replacement or not
+_CN.TRAINER.SB_SUBSET_SHUFFLE = True  # after sampling from scenes, whether shuffle within the epoch or not
+_CN.TRAINER.SB_REPEAT = 1  # repeat N times for training the sampled data
+# 'random' config
+_CN.TRAINER.RDM_REPLACEMENT = True
+_CN.TRAINER.RDM_NUM_SAMPLES = None
+# gradient clipping
+_CN.TRAINER.GRADIENT_CLIPPING = 0.5
+# reproducibility
+# This seed affects the data sampling. With the same seed, the data sampling is promised
+# to be the same. When resume training from a checkpoint, it's better to use a different
+# seed, otherwise the sampled data will be exactly the same as before resuming, which will
+# cause less unique data items sampled during the entire training.
+# Use of different seed values might affect the final training result, since not all data items
+# are used during training on ScanNet. (60M pairs of images sampled during traing from 230M pairs in total.)
+_CN.TRAINER.SEED = 66
+##############  Pretrain  ##############
+_CN.PRETRAIN = CN()
+_CN.PRETRAIN.PATCH_SIZE = 64 # patch sıze for masks
+_CN.PRETRAIN.MASK_RATIO = 0.5
+_CN.PRETRAIN.MAE_MARGINS = [0, 0.4, 0, 0] # margins not to be masked (up bottom left right)
+_CN.PRETRAIN.VAL_SEED = 42 # rng seed to crate the same masks for validation
+_CN.XOFTR.PRETRAIN_PATCH_SIZE = _CN.PRETRAIN.PATCH_SIZE
+##############  Test/Inference  ##############
+_CN.TEST = CN()
+_CN.TEST.IMG0_RESIZE = 640 # resize the longer side
+_CN.TEST.IMG1_RESIZE = 640 # resize the longer side
+_CN.TEST.DF = 8
+_CN.TEST.PADDING = False  # pad img to square with size = IMG0_RESIZE, IMG1_RESIZE
+_CN.TEST.COARSE_SCALE = 0.125
+def get_cfg_defaults(inference=False):
+    """Get a yacs CfgNode object with default values for my_project."""
+    # Return a clone so that the defaults will not be altered
+    # This is for the "local variable" use pattern
+    if inference:
+        _CN.XOFTR.COARSE.INFERENCE = True
+        _CN.XOFTR.MATCH_COARSE.INFERENCE = True
+        _CN.XOFTR.FINE.INFERENCE = True
+    return _CN.clone()

third_party/XoFTR/src/datasets/megadepth.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import os.path as osp
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+from loguru import logger
+from src.utils.dataset import read_megadepth_gray, read_megadepth_depth
+def correct_image_paths(scene_info):
+    """Changes the path format from undistorted images from D2Net to MegaDepth_v1 format"""
+    image_paths = scene_info["image_paths"]
+    for ii in range(len(image_paths)):
+        if image_paths[ii] is not None:
+            folds = image_paths[ii].split("/")
+            path = osp.join("phoenix/S6/zl548/MegaDepth_v1/", folds[1], "dense0/imgs", folds[3] )
+            image_paths[ii] = path
+    scene_info["image_paths"] = image_paths
+    return scene_info
+class MegaDepthDataset(Dataset):
+    def __init__(self,
+                 root_dir,
+                 npz_path,
+                 mode='train',
+                 min_overlap_score=0.4,
+                 img_resize=None,
+                 df=None,
+                 img_padding=False,
+                 depth_padding=False,
+                 augment_fn=None,
+                 **kwargs):
+        """
+        Manage one scene(npz_path) of MegaDepth dataset.
+        Args:
+            root_dir (str): megadepth root directory that has `phoenix`.
+            npz_path (str): {scene_id}.npz path. This contains image pair information of a scene.
+            mode (str): options are ['train', 'val', 'test']
+            min_overlap_score (float): how much a pair should have in common. In range of [0, 1]. Set to 0 when testing.
+            img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended.
+                                        This is useful during training with batches and testing with memory intensive algorithms.
+            df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize.
+            img_padding (bool): If set to 'True', zero-pad the image to squared size. This is useful during training.
+            depth_padding (bool): If set to 'True', zero-pad depthmap to (2000, 2000). This is useful during training.
+            augment_fn (callable, optional): augments images with pre-defined visual effects.
+        """
+        super().__init__()
+        self.root_dir = root_dir
+        self.mode = mode
+        self.scene_id = npz_path.split('.')[0]
+        # prepare scene_info and pair_info
+        if mode == 'test' and min_overlap_score != 0:
+            logger.warning("You are using `min_overlap_score`!=0 in test mode. Set to 0.")
+            min_overlap_score = 0
+        self.scene_info = np.load(npz_path, allow_pickle=True)
+        self.scene_info = correct_image_paths(self.scene_info)
+        self.pair_infos = self.scene_info['pair_infos'].copy()
+        del self.scene_info['pair_infos']
+        self.pair_infos = [pair_info for pair_info in self.pair_infos if pair_info[1] > min_overlap_score]
+        # parameters for image resizing, padding and depthmap padding
+        if mode == 'train':
+            assert img_resize is not None and img_padding and depth_padding
+        self.img_resize = img_resize
+        self.df = df
+        self.img_padding = img_padding
+        self.depth_max_size = 2000 if depth_padding else None  # the upperbound of depthmaps size in megadepth.
+        # for training XoFTR
+        # self.augment_fn = augment_fn if mode == 'train' else None
+        self.augment_fn = augment_fn
+        self.coarse_scale = getattr(kwargs, 'coarse_scale', 0.125)
+    def __len__(self):
+        return len(self.pair_infos)
+    def __getitem__(self, idx):
+        (idx0, idx1), overlap_score, central_matches = self.pair_infos[idx]
+        # read grayscale image and mask. (1, h, w) and (h, w)
+        img_name0 = osp.join(self.root_dir, self.scene_info['image_paths'][idx0])
+        img_name1 = osp.join(self.root_dir, self.scene_info['image_paths'][idx1])
+        if getattr(self.augment_fn, 'random_switch', False):
+            im_num = torch.randint(0, 2, (1,))
+            augment_fn_0 = lambda x: self.augment_fn(x, image_num=im_num)
+            augment_fn_1 = lambda x: self.augment_fn(x, image_num=1-im_num)
+        else:
+            augment_fn_0 = self.augment_fn
+            augment_fn_1 = self.augment_fn
+        image0, mask0, scale0 = read_megadepth_gray(
+            img_name0, self.img_resize, self.df, self.img_padding, augment_fn=augment_fn_0)
+        image1, mask1, scale1 = read_megadepth_gray(
+            img_name1, self.img_resize, self.df, self.img_padding, augment_fn=augment_fn_1)
+        # read depth. shape: (h, w)
+        if self.mode in ['train', 'val']:
+            depth0 = read_megadepth_depth(
+                osp.join(self.root_dir, self.scene_info['depth_paths'][idx0]), pad_to=self.depth_max_size)
+            depth1 = read_megadepth_depth(
+                osp.join(self.root_dir, self.scene_info['depth_paths'][idx1]), pad_to=self.depth_max_size)
+        else:
+            depth0 = depth1 = torch.tensor([])
+        # read intrinsics of original size
+        K_0 = torch.tensor(self.scene_info['intrinsics'][idx0].copy(), dtype=torch.float).reshape(3, 3)
+        K_1 = torch.tensor(self.scene_info['intrinsics'][idx1].copy(), dtype=torch.float).reshape(3, 3)
+        # read and compute relative poses
+        T0 = self.scene_info['poses'][idx0]
+        T1 = self.scene_info['poses'][idx1]
+        T_0to1 = torch.tensor(np.matmul(T1, np.linalg.inv(T0)), dtype=torch.float)[:4, :4]  # (4, 4)
+        T_1to0 = T_0to1.inverse()
+        data = {
+            'image0': image0,  # (1, h, w)
+            'depth0': depth0,  # (h, w)
+            'image1': image1,
+            'depth1': depth1,
+            'T_0to1': T_0to1,  # (4, 4)
+            'T_1to0': T_1to0,
+            'K0': K_0,  # (3, 3)
+            'K1': K_1,
+            'scale0': scale0,  # [scale_w, scale_h]
+            'scale1': scale1,
+            'dataset_name': 'MegaDepth',
+            'scene_id': self.scene_id,
+            'pair_id': idx,
+            'pair_names': (self.scene_info['image_paths'][idx0], self.scene_info['image_paths'][idx1]),
+        }
+        # for XoFTR training
+        if mask0 is not None:  # img_padding is True
+            if self.coarse_scale:
+                [ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(),
+                                                       scale_factor=self.coarse_scale,
+                                                       mode='nearest',
+                                                       recompute_scale_factor=False)[0].bool()
+            data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1})
+        return data

third_party/XoFTR/src/datasets/pretrain_dataset.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import os
+import glob
+import os.path as osp
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+from loguru import logger
+import random
+from src.utils.dataset import read_pretrain_gray
+class PretrainDataset(Dataset):
+    def __init__(self,
+                 root_dir,
+                 mode='train',
+                 img_resize=None,
+                 df=None,
+                 img_padding=False,
+                 frame_gap=2,
+                 **kwargs):
+        """
+        Manage image pairs of KAIST Multispectral Pedestrian Detection Benchmark Dataset.
+        Args:
+            root_dir (str): KAIST Multispectral Pedestrian  root directory that has `phoenix`.
+            mode (str): options are ['train', 'val']
+            img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended.
+                                        This is useful during training with batches and testing with memory intensive algorithms.
+            df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize.
+            img_padding (bool): If set to 'True', zero-pad the image to squared size. This is useful during training.
+            augment_fn (callable, optional): augments images with pre-defined visual effects.
+        """
+        super().__init__()
+        self.root_dir = root_dir
+        self.mode = mode
+        # specify which part of the data is used for trainng and testing
+        if mode == 'train':
+            assert img_resize is not None and img_padding
+            self.start_ratio = 0.0
+            self.end_ratio = 0.9
+        elif mode == 'val':
+            assert img_resize is not None and img_padding
+            self.start_ratio = 0.9
+            self.end_ratio = 1.0
+        else:
+            raise NotImplementedError()
+        # parameters for image resizing, padding
+        self.img_resize = img_resize
+        self.df = df
+        self.img_padding = img_padding
+        # for training XoFTR
+        self.coarse_scale = getattr(kwargs, 'coarse_scale', 0.125)
+        self.pair_paths = self.generate_kaist_pairs(root_dir, frame_gap=frame_gap, second_frame_range=0)
+    def get_kaist_image_paths(self, root_dir):
+        vis_img_paths = []
+        lwir_img_paths = []
+        img_num_per_folder = []
+        # Recursively search for folders named "image"
+        for folder, subfolders, filenames in os.walk(root_dir):
+            if "visible" in subfolders and "lwir" in subfolders:
+                vis_img_folder = osp.join(folder, "visible")
+                lwir_img_folder = osp.join(folder, "lwir")
+                # Use glob to find image files (you can add more extensions if needed)
+                vis_imgs_i = glob.glob(osp.join(vis_img_folder, '*.jpg'))
+                vis_imgs_i.sort()
+                lwir_imgs_i = glob.glob(osp.join(lwir_img_folder, '*.jpg'))
+                lwir_imgs_i.sort()
+                vis_img_paths.append(vis_imgs_i)
+                lwir_img_paths.append(lwir_imgs_i)
+                img_num_per_folder.append(len(vis_imgs_i))
+                assert len(vis_imgs_i) == len(lwir_imgs_i), f"Image numbers do not match in {folder}, {len(vis_imgs_i)} != {len(lwir_imgs_i)}"
+                # Add more image file extensions as necessary
+        return vis_img_paths, lwir_img_paths, img_num_per_folder
+    def generate_kaist_pairs(self, root_dir, frame_gap, second_frame_range):
+        """ generate image pairs (Vis-TIR) from KAIST Pedestrian dataset
+        Args:
+            root_dir: root directory for the dataset
+            frame_gap (int): the frame gap between consecutive images
+            second_frame_range (int): the range for second image i.e. for the first ind i, second ind j element of [i-10, i+10]
+        Returns:
+            pair_paths (list)
+        """
+        vis_img_paths, lwir_img_paths, img_num_per_folder = self.get_kaist_image_paths(root_dir)
+        pair_paths = []
+        for i in range(len(img_num_per_folder)):
+            num_img = img_num_per_folder[i]
+            inds_vis = torch.arange(int(self.start_ratio * num_img),
+                                    int(self.end_ratio * num_img),
+                                    frame_gap, dtype=int)
+            if second_frame_range > 0:
+                inds_lwir = inds_vis + torch.randint(-second_frame_range, second_frame_range, (inds_vis.shape[0],))
+                inds_lwir[inds_lwir<int(self.start_ratio * num_img)] = int(self.start_ratio * num_img)
+                inds_lwir[inds_lwir>int(self.end_ratio * num_img)-1] = int(self.end_ratio * num_img)-1
+            else:
+                inds_lwir = inds_vis
+            for j, k in zip(inds_vis, inds_lwir):
+                img_name0 = os.path.relpath(vis_img_paths[i][j], root_dir)
+                img_name1 = os.path.relpath(lwir_img_paths[i][k], root_dir)
+                if torch.rand(1) > 0.5:
+                    img_name0, img_name1 = img_name1, img_name0
+                pair_paths.append([img_name0, img_name1])
+        random.shuffle(pair_paths)
+        return pair_paths
+    def __len__(self):
+        return len(self.pair_paths)
+    def __getitem__(self, idx):
+        # read grayscale and normalized image, and mask. (1, h, w) and (h, w)
+        img_name0 = osp.join(self.root_dir, self.pair_paths[idx][0])
+        img_name1 = osp.join(self.root_dir, self.pair_paths[idx][1])
+        if self.mode == "train" and torch.rand(1) > 0.5:
+            img_name0, img_name1 = img_name1, img_name0
+        image0, image0_norm, mask0, scale0, image0_mean, image0_std = read_pretrain_gray(
+            img_name0, self.img_resize, self.df, self.img_padding, None)
+        image1, image1_norm, mask1, scale1, image1_mean, image1_std = read_pretrain_gray(
+            img_name1, self.img_resize, self.df, self.img_padding, None)
+        data = {
+            'image0': image0,  # (1, h, w)
+            'image1': image1,
+            'image0_norm': image0_norm,
+            'image1_norm': image1_norm,
+            'scale0': scale0,  # [scale_w, scale_h]
+            'scale1': scale1,
+            "image0_mean": image0_mean,
+            "image0_std": image0_std,
+            "image1_mean": image1_mean,
+            "image1_std": image1_std,
+            'dataset_name': 'PreTrain',
+            'pair_id': idx,
+            'pair_names': (self.pair_paths[idx][0], self.pair_paths[idx][1]),
+        }
+        # for XoFTR training
+        if mask0 is not None:  # img_padding is True
+            if self.coarse_scale:
+                [ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(),
+                                                       scale_factor=self.coarse_scale,
+                                                       mode='nearest',
+                                                       recompute_scale_factor=False)[0].bool()
+            data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1})
+        return data

third_party/XoFTR/src/datasets/sampler.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import torch
+from torch.utils.data import Sampler, ConcatDataset
+class RandomConcatSampler(Sampler):
+    """ Random sampler for ConcatDataset. At each epoch, `n_samples_per_subset` samples will be draw from each subset
+    in the ConcatDataset. If `subset_replacement` is ``True``, sampling within each subset will be done with replacement.
+    However, it is impossible to sample data without replacement between epochs, unless bulding a stateful sampler lived along the entire training phase.
+    For current implementation, the randomness of sampling is ensured no matter the sampler is recreated across epochs or not and call `torch.manual_seed()` or not.
+    Args:
+        shuffle (bool): shuffle the random sampled indices across all sub-datsets.
+        repeat (int): repeatedly use the sampled indices multiple times for training.
+            [arXiv:1902.05509, arXiv:1901.09335]
+    NOTE: Don't re-initialize the sampler between epochs (will lead to repeated samples)
+    NOTE: This sampler behaves differently with DistributedSampler.
+          It assume the dataset is splitted across ranks instead of replicated.
+    TODO: Add a `set_epoch()` method to fullfill sampling without replacement across epochs.
+          ref: https://github.com/PyTorchLightning/pytorch-lightning/blob/e9846dd758cfb1500eb9dba2d86f6912eb487587/pytorch_lightning/trainer/training_loop.py#L373
+    """
+    def __init__(self,
+                 data_source: ConcatDataset,
+                 n_samples_per_subset: int,
+                 subset_replacement: bool=True,
+                 shuffle: bool=True,
+                 repeat: int=1,
+                 seed: int=None):
+        if not isinstance(data_source, ConcatDataset):
+            raise TypeError("data_source should be torch.utils.data.ConcatDataset")
+        self.data_source = data_source
+        self.n_subset = len(self.data_source.datasets)
+        self.n_samples_per_subset = n_samples_per_subset
+        self.n_samples = self.n_subset * self.n_samples_per_subset * repeat
+        self.subset_replacement = subset_replacement
+        self.repeat = repeat
+        self.shuffle = shuffle
+        self.generator = torch.manual_seed(seed)
+        assert self.repeat >= 1
+    def __len__(self):
+        return self.n_samples
+    def __iter__(self):
+        indices = []
+        # sample from each sub-dataset
+        for d_idx in range(self.n_subset):
+            low = 0 if d_idx==0 else self.data_source.cumulative_sizes[d_idx-1]
+            high = self.data_source.cumulative_sizes[d_idx]
+            if self.subset_replacement:
+                rand_tensor = torch.randint(low, high, (self.n_samples_per_subset, ),
+                                            generator=self.generator, dtype=torch.int64)
+            else:  # sample without replacement
+                len_subset = len(self.data_source.datasets[d_idx])
+                rand_tensor = torch.randperm(len_subset, generator=self.generator) + low
+                if len_subset >= self.n_samples_per_subset:
+                    rand_tensor = rand_tensor[:self.n_samples_per_subset]
+                else: # padding with replacement
+                    rand_tensor_replacement = torch.randint(low, high, (self.n_samples_per_subset - len_subset, ),
+                                                            generator=self.generator, dtype=torch.int64)
+                    rand_tensor = torch.cat([rand_tensor, rand_tensor_replacement])
+            indices.append(rand_tensor)
+        indices = torch.cat(indices)
+        if self.shuffle:  # shuffle the sampled dataset (from multiple subsets)
+            rand_tensor = torch.randperm(len(indices), generator=self.generator)
+            indices = indices[rand_tensor]
+        # repeat the sampled indices (can be used for RepeatAugmentation or pure RepeatSampling)
+        if self.repeat > 1:
+            repeat_indices = [indices.clone() for _ in range(self.repeat - 1)]
+            if self.shuffle:
+                _choice = lambda x: x[torch.randperm(len(x), generator=self.generator)]
+                repeat_indices = map(_choice, repeat_indices)
+            indices = torch.cat([indices, *repeat_indices], 0)
+        assert indices.shape[0] == self.n_samples
+        return iter(indices.tolist())

third_party/XoFTR/src/datasets/scannet.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from os import path as osp
+from typing import Dict
+from unicodedata import name
+import numpy as np
+import torch
+import torch.utils as utils
+from numpy.linalg import inv
+from src.utils.dataset import (
+    read_scannet_gray,
+    read_scannet_depth,
+    read_scannet_pose,
+    read_scannet_intrinsic
+)
+class ScanNetDataset(utils.data.Dataset):
+    def __init__(self,
+                 root_dir,
+                 npz_path,
+                 intrinsic_path,
+                 mode='train',
+                 min_overlap_score=0.4,
+                 augment_fn=None,
+                 pose_dir=None,
+                 **kwargs):
+        """Manage one scene of ScanNet Dataset.
+        Args:
+            root_dir (str): ScanNet root directory that contains scene folders.
+            npz_path (str): {scene_id}.npz path. This contains image pair information of a scene.
+            intrinsic_path (str): path to depth-camera intrinsic file.
+            mode (str): options are ['train', 'val', 'test'].
+            augment_fn (callable, optional): augments images with pre-defined visual effects.
+            pose_dir (str): ScanNet root directory that contains all poses.
+                (we use a separate (optional) pose_dir since we store images and poses separately.)
+        """
+        super().__init__()
+        self.root_dir = root_dir
+        self.pose_dir = pose_dir if pose_dir is not None else root_dir
+        self.mode = mode
+        # prepare data_names, intrinsics and extrinsics(T)
+        with np.load(npz_path) as data:
+            self.data_names = data['name']
+            if 'score' in data.keys() and mode not in ['val' or 'test']:
+                kept_mask = data['score'] > min_overlap_score
+                self.data_names = self.data_names[kept_mask]
+        self.intrinsics = dict(np.load(intrinsic_path))
+        # for training LoFTR
+        self.augment_fn = augment_fn if mode == 'train' else None
+    def __len__(self):
+        return len(self.data_names)
+    def _read_abs_pose(self, scene_name, name):
+        pth = osp.join(self.pose_dir,
+                       scene_name,
+                       'pose', f'{name}.txt')
+        return read_scannet_pose(pth)
+    def _compute_rel_pose(self, scene_name, name0, name1):
+        pose0 = self._read_abs_pose(scene_name, name0)
+        pose1 = self._read_abs_pose(scene_name, name1)
+        return np.matmul(pose1, inv(pose0))  # (4, 4)
+    def __getitem__(self, idx):
+        data_name = self.data_names[idx]
+        scene_name, scene_sub_name, stem_name_0, stem_name_1 = data_name
+        scene_name = f'scene{scene_name:04d}_{scene_sub_name:02d}'
+        # read the grayscale image which will be resized to (1, 480, 640)
+        img_name0 = osp.join(self.root_dir, scene_name, 'color', f'{stem_name_0}.jpg')
+        img_name1 = osp.join(self.root_dir, scene_name, 'color', f'{stem_name_1}.jpg')
+        # TODO: Support augmentation & handle seeds for each worker correctly.
+        image0 = read_scannet_gray(img_name0, resize=(640, 480), augment_fn=None)
+                                #    augment_fn=np.random.choice([self.augment_fn, None], p=[0.5, 0.5]))
+        image1 = read_scannet_gray(img_name1, resize=(640, 480), augment_fn=None)
+                                #    augment_fn=np.random.choice([self.augment_fn, None], p=[0.5, 0.5]))
+        # read the depthmap which is stored as (480, 640)
+        if self.mode in ['train', 'val']:
+            depth0 = read_scannet_depth(osp.join(self.root_dir, scene_name, 'depth', f'{stem_name_0}.png'))
+            depth1 = read_scannet_depth(osp.join(self.root_dir, scene_name, 'depth', f'{stem_name_1}.png'))
+        else:
+            depth0 = depth1 = torch.tensor([])
+        # read the intrinsic of depthmap
+        K_0 = K_1 = torch.tensor(self.intrinsics[scene_name].copy(), dtype=torch.float).reshape(3, 3)
+        # read and compute relative poses
+        T_0to1 = torch.tensor(self._compute_rel_pose(scene_name, stem_name_0, stem_name_1),
+                              dtype=torch.float32)
+        T_1to0 = T_0to1.inverse()
+        data = {
+            'image0': image0,   # (1, h, w)
+            'depth0': depth0,   # (h, w)
+            'image1': image1,
+            'depth1': depth1,
+            'T_0to1': T_0to1,   # (4, 4)
+            'T_1to0': T_1to0,
+            'K0': K_0,  # (3, 3)
+            'K1': K_1,
+            'dataset_name': 'ScanNet',
+            'scene_id': scene_name,
+            'pair_id': idx,
+            'pair_names': (osp.join(scene_name, 'color', f'{stem_name_0}.jpg'),
+                           osp.join(scene_name, 'color', f'{stem_name_1}.jpg'))
+        }
+        return data

third_party/XoFTR/src/datasets/vistir.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import os.path as osp
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+from loguru import logger
+from src.utils.dataset import read_vistir_gray
+class VisTirDataset(Dataset):
+    def __init__(self,
+                 root_dir,
+                 npz_path,
+                 mode='val',
+                 img_resize=None,
+                 df=None,
+                 img_padding=False,
+                 **kwargs):
+        """
+        Manage one scene(npz_path) of VisTir dataset.
+        Args:
+            root_dir (str): VisTIR root directory.
+            npz_path (str): {scene_id}.npz path. This contains image pair information of a scene.
+            mode (str): options are ['val', 'test']
+            img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended.
+            df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize.
+            img_padding (bool): If set to 'True', zero-pad the image to squared size.
+        """
+        super().__init__()
+        self.root_dir = root_dir
+        self.mode = mode
+        self.scene_id = npz_path.split('.')[0]
+        # prepare scene_info and pair_info
+        self.scene_info = dict(np.load(npz_path, allow_pickle=True))
+        self.pair_infos = self.scene_info['pair_infos'].copy()
+        del self.scene_info['pair_infos']
+        # parameters for image resizing, padding
+        self.img_resize = img_resize
+        self.df = df
+        self.img_padding = img_padding
+        # for training XoFTR
+        self.coarse_scale = getattr(kwargs, 'coarse_scale', 0.125)
+    def __len__(self):
+        return len(self.pair_infos)
+    def __getitem__(self, idx):
+        (idx0, idx1) = self.pair_infos[idx]
+        img_name0 = osp.join(self.root_dir, self.scene_info['image_paths'][idx0][0])
+        img_name1 = osp.join(self.root_dir, self.scene_info['image_paths'][idx1][1])
+        # read intrinsics of original size
+        K_0 = np.array(self.scene_info['intrinsics'][idx0][0], dtype=float).reshape(3,3)
+        K_1 = np.array(self.scene_info['intrinsics'][idx1][1], dtype=float).reshape(3,3)
+        # read distortion coefficients
+        dist0 =  np.array(self.scene_info['distortion_coefs'][idx0][0], dtype=float)
+        dist1 = np.array(self.scene_info['distortion_coefs'][idx1][1], dtype=float)
+        # read grayscale undistorted image and mask. (1, h, w) and (h, w)
+        image0, mask0, scale0, K_0 = read_vistir_gray(
+            img_name0, K_0, dist0, self.img_resize, self.df, self.img_padding, augment_fn=None)
+        image1, mask1, scale1, K_1 = read_vistir_gray(
+            img_name1, K_1, dist1, self.img_resize, self.df, self.img_padding, augment_fn=None)
+        # to tensor
+        K_0 = torch.tensor(K_0.copy(), dtype=torch.float).reshape(3, 3)
+        K_1 = torch.tensor(K_1.copy(), dtype=torch.float).reshape(3, 3)
+        # read and compute relative poses
+        T0 = self.scene_info['poses'][idx0]
+        T1 = self.scene_info['poses'][idx1]
+        T_0to1 = torch.tensor(np.matmul(T1, np.linalg.inv(T0)), dtype=torch.float)[:4, :4]  # (4, 4)
+        T_1to0 = T_0to1.inverse()
+        data = {
+            'image0': image0,  # (1, h, w)
+            'image1': image1,
+            'T_0to1': T_0to1,  # (4, 4)
+            'T_1to0': T_1to0,
+            'K0': K_0,  # (3, 3)
+            'K1': K_1,
+            'dist0': dist0,
+            'dist1': dist1,
+            'scale0': scale0,  # [scale_w, scale_h]
+            'scale1': scale1,
+            'dataset_name': 'VisTir',
+            'scene_id': self.scene_id,
+            'pair_id': idx,
+            'pair_names': (self.scene_info['image_paths'][idx0][0], self.scene_info['image_paths'][idx1][1]),
+        }
+        # for XoFTR training
+        if mask0 is not None:  # img_padding is True
+            if self.coarse_scale:
+                [ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(),
+                                                       scale_factor=self.coarse_scale,
+                                                       mode='nearest',
+                                                       recompute_scale_factor=False)[0].bool()
+            data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1})
+        return data

third_party/XoFTR/src/lightning/data.py ADDED Viewed

	@@ -0,0 +1,346 @@

+import os
+import math
+from collections import abc
+from loguru import logger
+from torch.utils.data.dataset import Dataset
+from tqdm import tqdm
+from os import path as osp
+from pathlib import Path
+from joblib import Parallel, delayed
+import pytorch_lightning as pl
+from torch import distributed as dist
+from torch.utils.data import (
+    Dataset,
+    DataLoader,
+    ConcatDataset,
+    DistributedSampler,
+    RandomSampler,
+    dataloader
+)
+from src.utils.augment import build_augmentor
+from src.utils.dataloader import get_local_split
+from src.utils.misc import tqdm_joblib
+from src.utils import comm
+from src.datasets.megadepth import MegaDepthDataset
+from src.datasets.vistir import VisTirDataset
+from src.datasets.scannet import ScanNetDataset
+from src.datasets.sampler import RandomConcatSampler
+class MultiSceneDataModule(pl.LightningDataModule):
+    """
+    For distributed training, each training process is assgined
+    only a part of the training scenes to reduce memory overhead.
+    """
+    def __init__(self, args, config):
+        super().__init__()
+        # 1. data config
+        # Train and Val should from the same data source
+        self.train_data_source = config.DATASET.TRAIN_DATA_SOURCE
+        self.val_data_source = config.DATASET.VAL_DATA_SOURCE
+        self.test_data_source = config.DATASET.TEST_DATA_SOURCE
+        # training and validating
+        self.train_data_root = config.DATASET.TRAIN_DATA_ROOT
+        self.train_pose_root = config.DATASET.TRAIN_POSE_ROOT  # (optional)
+        self.train_npz_root = config.DATASET.TRAIN_NPZ_ROOT
+        self.train_list_path = config.DATASET.TRAIN_LIST_PATH
+        self.train_intrinsic_path = config.DATASET.TRAIN_INTRINSIC_PATH
+        self.val_data_root = config.DATASET.VAL_DATA_ROOT
+        self.val_pose_root = config.DATASET.VAL_POSE_ROOT  # (optional)
+        self.val_npz_root = config.DATASET.VAL_NPZ_ROOT
+        self.val_list_path = config.DATASET.VAL_LIST_PATH
+        self.val_intrinsic_path = config.DATASET.VAL_INTRINSIC_PATH
+        # testing
+        self.test_data_root = config.DATASET.TEST_DATA_ROOT
+        self.test_pose_root = config.DATASET.TEST_POSE_ROOT  # (optional)
+        self.test_npz_root = config.DATASET.TEST_NPZ_ROOT
+        self.test_list_path = config.DATASET.TEST_LIST_PATH
+        self.test_intrinsic_path = config.DATASET.TEST_INTRINSIC_PATH
+        # 2. dataset config
+        # general options
+        self.min_overlap_score_test = config.DATASET.MIN_OVERLAP_SCORE_TEST  # 0.4, omit data with overlap_score < min_overlap_score
+        self.min_overlap_score_train = config.DATASET.MIN_OVERLAP_SCORE_TRAIN
+        self.augment_fn = build_augmentor(config.DATASET.AUGMENTATION_TYPE)  # None, options: [None, 'dark', 'mobile']
+        # MegaDepth options
+        self.mgdpt_img_resize = config.DATASET.MGDPT_IMG_RESIZE  # 840
+        self.mgdpt_img_pad = config.DATASET.MGDPT_IMG_PAD   # True
+        self.mgdpt_depth_pad = config.DATASET.MGDPT_DEPTH_PAD   # True
+        self.mgdpt_df = config.DATASET.MGDPT_DF  # 8
+        self.coarse_scale = 1 / config.XOFTR.RESOLUTION[0]  # 0.125. for training xoftr.
+        # VisTir options
+        self.vistir_img_resize = config.DATASET.VISTIR_IMG_RESIZE
+        self.vistir_img_pad = config.DATASET.VISTIR_IMG_PAD
+        self.vistir_df = config.DATASET.VISTIR_DF  # 8
+        # 3.loader parameters
+        self.train_loader_params = {
+            'batch_size': args.batch_size,
+            'num_workers': args.num_workers,
+            'pin_memory': getattr(args, 'pin_memory', True)
+        }
+        self.val_loader_params = {
+            'batch_size': 1,
+            'shuffle': False,
+            'num_workers': args.num_workers,
+            'pin_memory': getattr(args, 'pin_memory', True)
+        }
+        self.test_loader_params = {
+            'batch_size': 1,
+            'shuffle': False,
+            'num_workers': args.num_workers,
+            'pin_memory': True
+        }
+        # 4. sampler
+        self.data_sampler = config.TRAINER.DATA_SAMPLER
+        self.n_samples_per_subset = config.TRAINER.N_SAMPLES_PER_SUBSET
+        self.subset_replacement = config.TRAINER.SB_SUBSET_SAMPLE_REPLACEMENT
+        self.shuffle = config.TRAINER.SB_SUBSET_SHUFFLE
+        self.repeat = config.TRAINER.SB_REPEAT
+        # (optional) RandomSampler for debugging
+        # misc configurations
+        self.parallel_load_data = getattr(args, 'parallel_load_data', False)
+        self.seed = config.TRAINER.SEED  # 66
+    def setup(self, stage=None):
+        """
+        Setup train / val / test dataset. This method will be called by PL automatically.
+        Args:
+            stage (str): 'fit' in training phase, and 'test' in testing phase.
+        """
+        assert stage in ['fit', 'test'], "stage must be either fit or test"
+        try:
+            self.world_size = dist.get_world_size()
+            self.rank = dist.get_rank()
+            logger.info(f"[rank:{self.rank}] world_size: {self.world_size}")
+        except AssertionError as ae:
+            self.world_size = 1
+            self.rank = 0
+            logger.warning(str(ae) + " (set wolrd_size=1 and rank=0)")
+        if stage == 'fit':
+            self.train_dataset = self._setup_dataset(
+                self.train_data_root,
+                self.train_npz_root,
+                self.train_list_path,
+                self.train_intrinsic_path,
+                mode='train',
+                min_overlap_score=self.min_overlap_score_train,
+                pose_dir=self.train_pose_root)
+            # setup multiple (optional) validation subsets
+            if isinstance(self.val_list_path, (list, tuple)):
+                self.val_dataset = []
+                if not isinstance(self.val_npz_root, (list, tuple)):
+                    self.val_npz_root = [self.val_npz_root for _ in range(len(self.val_list_path))]
+                for npz_list, npz_root in zip(self.val_list_path, self.val_npz_root):
+                    self.val_dataset.append(self._setup_dataset(
+                        self.val_data_root,
+                        npz_root,
+                        npz_list,
+                        self.val_intrinsic_path,
+                        mode='val',
+                        min_overlap_score=self.min_overlap_score_test,
+                        pose_dir=self.val_pose_root))
+            else:
+                self.val_dataset = self._setup_dataset(
+                    self.val_data_root,
+                    self.val_npz_root,
+                    self.val_list_path,
+                    self.val_intrinsic_path,
+                    mode='val',
+                    min_overlap_score=self.min_overlap_score_test,
+                    pose_dir=self.val_pose_root)
+            logger.info(f'[rank:{self.rank}] Train & Val Dataset loaded!')
+        else:  # stage == 'test
+            self.test_dataset = self._setup_dataset(
+                self.test_data_root,
+                self.test_npz_root,
+                self.test_list_path,
+                self.test_intrinsic_path,
+                mode='test',
+                min_overlap_score=self.min_overlap_score_test,
+                pose_dir=self.test_pose_root)
+            logger.info(f'[rank:{self.rank}]: Test Dataset loaded!')
+    def _setup_dataset(self,
+                       data_root,
+                       split_npz_root,
+                       scene_list_path,
+                       intri_path,
+                       mode='train',
+                       min_overlap_score=0.,
+                       pose_dir=None):
+        """ Setup train / val / test set"""
+        with open(scene_list_path, 'r') as f:
+            npz_names = [name.split()[0] for name in f.readlines()]
+        if mode == 'train':
+            local_npz_names = get_local_split(npz_names, self.world_size, self.rank, self.seed)
+        else:
+            local_npz_names = npz_names
+        logger.info(f'[rank {self.rank}]: {len(local_npz_names)} scene(s) assigned.')
+        dataset_builder = self._build_concat_dataset_parallel \
+                            if self.parallel_load_data \
+                            else self._build_concat_dataset
+        return dataset_builder(data_root, local_npz_names, split_npz_root, intri_path,
+                                mode=mode, min_overlap_score=min_overlap_score, pose_dir=pose_dir)
+    def _build_concat_dataset(
+        self,
+        data_root,
+        npz_names,
+        npz_dir,
+        intrinsic_path,
+        mode,
+        min_overlap_score=0.,
+        pose_dir=None
+    ):
+        datasets = []
+        augment_fn = self.augment_fn
+        if mode == 'train':
+            data_source = self.train_data_source
+        elif mode == 'val':
+            data_source = self.val_data_source
+        else:
+            data_source = self.test_data_source
+        if str(data_source).lower() == 'megadepth':
+            npz_names = [f'{n}.npz' for n in npz_names]
+        for npz_name in tqdm(npz_names,
+                             desc=f'[rank:{self.rank}] loading {mode} datasets',
+                             disable=int(self.rank) != 0):
+            # `ScanNetDataset`/`MegaDepthDataset` load all data from npz_path when initialized, which might take time.
+            npz_path = osp.join(npz_dir, npz_name)
+            if data_source == 'ScanNet':
+                datasets.append(
+                    ScanNetDataset(data_root,
+                                   npz_path,
+                                   intrinsic_path,
+                                   mode=mode,
+                                   min_overlap_score=min_overlap_score,
+                                   augment_fn=augment_fn,
+                                   pose_dir=pose_dir))
+            elif data_source == 'MegaDepth':
+                datasets.append(
+                    MegaDepthDataset(data_root,
+                                     npz_path,
+                                     mode=mode,
+                                     min_overlap_score=min_overlap_score,
+                                     img_resize=self.mgdpt_img_resize,
+                                     df=self.mgdpt_df,
+                                     img_padding=self.mgdpt_img_pad,
+                                     depth_padding=self.mgdpt_depth_pad,
+                                     augment_fn=augment_fn,
+                                     coarse_scale=self.coarse_scale))
+            elif data_source == 'VisTir':
+                datasets.append(
+                    VisTirDataset(data_root,
+                                  npz_path,
+                                  mode=mode,
+                                  img_resize=self.vistir_img_resize,
+                                  df=self.vistir_df,
+                                  img_padding=self.vistir_img_pad,
+                                  coarse_scale=self.coarse_scale))
+            else:
+                raise NotImplementedError()
+        return ConcatDataset(datasets)
+    def _build_concat_dataset_parallel(
+        self,
+        data_root,
+        npz_names,
+        npz_dir,
+        intrinsic_path,
+        mode,
+        min_overlap_score=0.,
+        pose_dir=None,
+    ):
+        augment_fn = self.augment_fn
+        if mode == 'train':
+            data_source = self.train_data_source
+        elif mode == 'val':
+            data_source = self.val_data_source
+        else:
+            data_source = self.test_data_source
+        if str(data_source).lower() == 'megadepth':
+            npz_names = [f'{n}.npz' for n in npz_names]
+        with tqdm_joblib(tqdm(desc=f'[rank:{self.rank}] loading {mode} datasets',
+                              total=len(npz_names), disable=int(self.rank) != 0)):
+            if data_source == 'ScanNet':
+                datasets = Parallel(n_jobs=math.floor(len(os.sched_getaffinity(0)) * 0.9 / comm.get_local_size()))(
+                    delayed(lambda x: _build_dataset(
+                        ScanNetDataset,
+                        data_root,
+                        osp.join(npz_dir, x),
+                        intrinsic_path,
+                        mode=mode,
+                        min_overlap_score=min_overlap_score,
+                        augment_fn=augment_fn,
+                        pose_dir=pose_dir))(name)
+                    for name in npz_names)
+            elif data_source == 'MegaDepth':
+                # TODO: _pickle.PicklingError: Could not pickle the task to send it to the workers.
+                raise NotImplementedError()
+                datasets = Parallel(n_jobs=math.floor(len(os.sched_getaffinity(0)) * 0.9 / comm.get_local_size()))(
+                    delayed(lambda x: _build_dataset(
+                        MegaDepthDataset,
+                        data_root,
+                        osp.join(npz_dir, x),
+                        mode=mode,
+                        min_overlap_score=min_overlap_score,
+                        img_resize=self.mgdpt_img_resize,
+                        df=self.mgdpt_df,
+                        img_padding=self.mgdpt_img_pad,
+                        depth_padding=self.mgdpt_depth_pad,
+                        augment_fn=augment_fn,
+                        coarse_scale=self.coarse_scale))(name)
+                    for name in npz_names)
+            else:
+                raise ValueError(f'Unknown dataset: {data_source}')
+        return ConcatDataset(datasets)
+    def train_dataloader(self):
+        """ Build training dataloader for ScanNet / MegaDepth. """
+        assert self.data_sampler in ['scene_balance']
+        logger.info(f'[rank:{self.rank}/{self.world_size}]: Train Sampler and DataLoader re-init (should not re-init between epochs!).')
+        if self.data_sampler == 'scene_balance':
+            sampler = RandomConcatSampler(self.train_dataset,
+                                          self.n_samples_per_subset,
+                                          self.subset_replacement,
+                                          self.shuffle, self.repeat, self.seed)
+        else:
+            sampler = None
+        dataloader = DataLoader(self.train_dataset, sampler=sampler, **self.train_loader_params)
+        return dataloader
+    def val_dataloader(self):
+        """ Build validation dataloader for ScanNet / MegaDepth. """
+        logger.info(f'[rank:{self.rank}/{self.world_size}]: Val Sampler and DataLoader re-init.')
+        if not isinstance(self.val_dataset, abc.Sequence):
+            sampler = DistributedSampler(self.val_dataset, shuffle=False)
+            return DataLoader(self.val_dataset, sampler=sampler, **self.val_loader_params)
+        else:
+            dataloaders = []
+            for dataset in self.val_dataset:
+                sampler = DistributedSampler(dataset, shuffle=False)
+                dataloaders.append(DataLoader(dataset, sampler=sampler, **self.val_loader_params))
+            return dataloaders
+    def test_dataloader(self, *args, **kwargs):
+        logger.info(f'[rank:{self.rank}/{self.world_size}]: Test Sampler and DataLoader re-init.')
+        sampler = DistributedSampler(self.test_dataset, shuffle=False)
+        return DataLoader(self.test_dataset, sampler=sampler, **self.test_loader_params)
+def _build_dataset(dataset: Dataset, *args, **kwargs):
+    return dataset(*args, **kwargs)

third_party/XoFTR/src/lightning/data_pretrain.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from collections import abc
+from loguru import logger
+import pytorch_lightning as pl
+from torch import distributed as dist
+from torch.utils.data import (
+    DataLoader,
+    ConcatDataset,
+    DistributedSampler
+)
+from src.datasets.pretrain_dataset import PretrainDataset
+class PretrainDataModule(pl.LightningDataModule):
+    """
+    For distributed training, each training process is assgined
+    only a part of the training scenes to reduce memory overhead.
+    """
+    def __init__(self, args, config):
+        super().__init__()
+        # 1. data config
+        # Train and Val should from the same data source
+        self.train_data_source = config.DATASET.TRAIN_DATA_SOURCE
+        self.val_data_source = config.DATASET.VAL_DATA_SOURCE
+        # training and validating
+        self.train_data_root = config.DATASET.TRAIN_DATA_ROOT
+        self.val_data_root = config.DATASET.VAL_DATA_ROOT
+        # 2. dataset config']
+        # dataset options
+        self.pretrain_img_resize = config.DATASET.PRETRAIN_IMG_RESIZE  # 840
+        self.pretrain_img_pad = config.DATASET.PRETRAIN_IMG_PAD   # True
+        self.pretrain_df = config.DATASET.PRETRAIN_DF  # 8
+        self.coarse_scale = 1 / config.XOFTR.RESOLUTION[0]  # 0.125. for training xoftr.
+        self.frame_gap = config.DATASET.PRETRAIN_FRAME_GAP
+        # 3.loader parameters
+        self.train_loader_params = {
+            'batch_size': args.batch_size,
+            'num_workers': args.num_workers,
+            'pin_memory': getattr(args, 'pin_memory', True)
+        }
+        self.val_loader_params = {
+            'batch_size': 1,
+            'shuffle': False,
+            'num_workers': args.num_workers,
+            'pin_memory': getattr(args, 'pin_memory', True)
+        }
+    def setup(self, stage=None):
+        """
+        Setup train / val / test dataset. This method will be called by PL automatically.
+        Args:
+            stage (str): 'fit' in training phase, and 'test' in testing phase.
+        """
+        assert stage in ['fit', 'test'], "stage must be either fit or test"
+        try:
+            self.world_size = dist.get_world_size()
+            self.rank = dist.get_rank()
+            logger.info(f"[rank:{self.rank}] world_size: {self.world_size}")
+        except AssertionError as ae:
+            self.world_size = 1
+            self.rank = 0
+            logger.warning(str(ae) + " (set wolrd_size=1 and rank=0)")
+        if stage == 'fit':
+            self.train_dataset = self._setup_dataset(
+                self.train_data_root,
+                mode='train')
+            # setup multiple (optional) validation subsets
+            self.val_dataset = []
+            self.val_dataset.append(self._setup_dataset(
+                self.val_data_root,
+                mode='val'))
+            logger.info(f'[rank:{self.rank}] Train & Val Dataset loaded!')
+        else:  # stage == 'test
+            raise ValueError(f"only 'fit' implemented")
+    def _setup_dataset(self,
+                       data_root,
+                       mode='train'):
+        """ Setup train / val / test set"""
+        dataset_builder = self._build_concat_dataset
+        return dataset_builder(data_root, mode=mode)
+    def _build_concat_dataset(
+        self,
+        data_root,
+        mode
+    ):
+        datasets = []
+        datasets.append(
+            PretrainDataset(data_root,
+                                mode=mode,
+                                img_resize=self.pretrain_img_resize,
+                                df=self.pretrain_df,
+                                img_padding=self.pretrain_img_pad,
+                                coarse_scale=self.coarse_scale,
+                                frame_gap=self.frame_gap))
+        return ConcatDataset(datasets)
+    def train_dataloader(self):
+        """ Build training dataloader for KAIST dataset. """
+        sampler = DistributedSampler(self.train_dataset, shuffle=True)
+        dataloader = DataLoader(self.train_dataset, sampler=sampler, **self.train_loader_params)
+        return dataloader
+    def val_dataloader(self):
+        """ Build validation dataloader KAIST dataset. """
+        if not isinstance(self.val_dataset, abc.Sequence):
+            return DataLoader(self.val_dataset, sampler=sampler, **self.val_loader_params)
+        else:
+            dataloaders = []
+            for dataset in self.val_dataset:
+                sampler = DistributedSampler(dataset, shuffle=False)
+                dataloaders.append(DataLoader(dataset, sampler=sampler, **self.val_loader_params))
+            return dataloaders

third_party/XoFTR/src/lightning/lightning_xoftr.py ADDED Viewed

	@@ -0,0 +1,334 @@

+from collections import defaultdict
+import pprint
+from loguru import logger
+from pathlib import Path
+import torch
+import numpy as np
+import pytorch_lightning as pl
+from matplotlib import pyplot as plt
+plt.switch_backend('agg')
+from src.xoftr import XoFTR
+from src.xoftr.utils.supervision import compute_supervision_coarse, compute_supervision_fine
+from src.losses.xoftr_loss import XoFTRLoss
+from src.optimizers import build_optimizer, build_scheduler
+from src.utils.metrics import (
+    compute_symmetrical_epipolar_errors,
+    compute_pose_errors,
+    aggregate_metrics
+)
+from src.utils.plotting import make_matching_figures
+from src.utils.comm import gather, all_gather
+from src.utils.misc import lower_config, flattenList
+from src.utils.profiler import PassThroughProfiler
+class PL_XoFTR(pl.LightningModule):
+    def __init__(self, config, pretrained_ckpt=None, profiler=None, dump_dir=None):
+        """
+        TODO:
+            - use the new version of PL logging API.
+        """
+        super().__init__()
+        # Misc
+        self.config = config  # full config
+        _config = lower_config(self.config)
+        self.xoftr_cfg = lower_config(_config['xoftr'])
+        self.profiler = profiler or PassThroughProfiler()
+        self.n_vals_plot = max(config.TRAINER.N_VAL_PAIRS_TO_PLOT // config.TRAINER.WORLD_SIZE, 1)
+        # Matcher: XoFTR
+        self.matcher = XoFTR(config=_config['xoftr'])
+        self.loss = XoFTRLoss(_config)
+        # Pretrained weights
+        if pretrained_ckpt:
+            state_dict = torch.load(pretrained_ckpt, map_location='cpu')['state_dict']
+            self.matcher.load_state_dict(state_dict, strict=False)
+            logger.info(f"Load \'{pretrained_ckpt}\' as pretrained checkpoint")
+            for name, param in self.matcher.named_parameters():
+                if name in state_dict.keys():
+                    print("in ckpt: ", name)
+                else:
+                    print("out ckpt: ", name)
+        # Testing
+        self.dump_dir = dump_dir
+    def configure_optimizers(self):
+        # FIXME: The scheduler did not work properly when `--resume_from_checkpoint`
+        optimizer = build_optimizer(self, self.config)
+        scheduler = build_scheduler(self.config, optimizer)
+        return [optimizer], [scheduler]
+    def optimizer_step(
+            self, epoch, batch_idx, optimizer, optimizer_idx,
+            optimizer_closure, on_tpu, using_native_amp, using_lbfgs):
+        # learning rate warm up
+        warmup_step = self.config.TRAINER.WARMUP_STEP
+        if self.trainer.global_step < warmup_step:
+            if self.config.TRAINER.WARMUP_TYPE == 'linear':
+                base_lr = self.config.TRAINER.WARMUP_RATIO * self.config.TRAINER.TRUE_LR
+                lr = base_lr + \
+                    (self.trainer.global_step / self.config.TRAINER.WARMUP_STEP) * \
+                    abs(self.config.TRAINER.TRUE_LR - base_lr)
+                for pg in optimizer.param_groups:
+                    pg['lr'] = lr
+            elif self.config.TRAINER.WARMUP_TYPE == 'constant':
+                pass
+            else:
+                raise ValueError(f'Unknown lr warm-up strategy: {self.config.TRAINER.WARMUP_TYPE}')
+        # update params
+        optimizer.step(closure=optimizer_closure)
+        optimizer.zero_grad()
+    def _trainval_inference(self, batch):
+        with self.profiler.profile("Compute coarse supervision"):
+            compute_supervision_coarse(batch, self.config)
+        with self.profiler.profile("XoFTR"):
+            self.matcher(batch)
+        with self.profiler.profile("Compute fine supervision"):
+            compute_supervision_fine(batch, self.config)
+        with self.profiler.profile("Compute losses"):
+            self.loss(batch)
+    def _compute_metrics(self, batch):
+        with self.profiler.profile("Copmute metrics"):
+            compute_symmetrical_epipolar_errors(batch)  # compute epi_errs for each match
+            compute_pose_errors(batch, self.config)  # compute R_errs, t_errs, pose_errs for each pair
+            rel_pair_names = list(zip(*batch['pair_names']))
+            bs = batch['image0'].size(0)
+            metrics = {
+                # to filter duplicate pairs caused by DistributedSampler
+                'identifiers': ['#'.join(rel_pair_names[b]) for b in range(bs)],
+                'epi_errs': [batch['epi_errs'][batch['m_bids'] == b].cpu().numpy() for b in range(bs)],
+                'R_errs': batch['R_errs'],
+                't_errs': batch['t_errs'],
+                'inliers': batch['inliers']}
+            if self.config.DATASET.VAL_DATA_SOURCE == "VisTir":
+                metrics.update({'scene_id': batch['scene_id']})
+            ret_dict = {'metrics': metrics}
+        return ret_dict, rel_pair_names
+    def training_step(self, batch, batch_idx):
+        self._trainval_inference(batch)
+        # logging
+        if self.trainer.global_rank == 0 and self.global_step % self.trainer.log_every_n_steps == 0:
+            # scalars
+            for k, v in batch['loss_scalars'].items():
+                self.logger[0].experiment.add_scalar(f'train/{k}', v, self.global_step)
+                if self.config.TRAINER.USE_WANDB:
+                    self.logger[1].log_metrics({f'train/{k}': v}, self.global_step)
+            # figures
+            if self.config.TRAINER.ENABLE_PLOTTING:
+                compute_symmetrical_epipolar_errors(batch)  # compute epi_errs for each match
+                figures = make_matching_figures(batch, self.config, self.config.TRAINER.PLOT_MODE)
+                for k, v in figures.items():
+                    self.logger[0].experiment.add_figure(f'train_match/{k}', v, self.global_step)
+        return {'loss': batch['loss']}
+    def training_epoch_end(self, outputs):
+        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
+        if self.trainer.global_rank == 0:
+            self.logger[0].experiment.add_scalar(
+                'train/avg_loss_on_epoch', avg_loss,
+                global_step=self.current_epoch)
+            if self.config.TRAINER.USE_WANDB:
+                self.logger[1].log_metrics(
+                    {'train/avg_loss_on_epoch': avg_loss},
+                    self.current_epoch)
+    def validation_step(self, batch, batch_idx):
+        # no loss calculation for VisTir during val
+        if self.config.DATASET.VAL_DATA_SOURCE == "VisTir":
+            with self.profiler.profile("XoFTR"):
+                self.matcher(batch)
+        else:
+            self._trainval_inference(batch)
+        ret_dict, _ = self._compute_metrics(batch)
+        val_plot_interval = max(self.trainer.num_val_batches[0] // self.n_vals_plot, 1)
+        figures = {self.config.TRAINER.PLOT_MODE: []}
+        if batch_idx % val_plot_interval == 0:
+            figures = make_matching_figures(batch, self.config, mode=self.config.TRAINER.PLOT_MODE, ret_dict=ret_dict)
+        if self.config.DATASET.VAL_DATA_SOURCE == "VisTir":
+            return {
+                **ret_dict,
+                'figures': figures,
+            }
+        else:
+            return {
+                **ret_dict,
+                'loss_scalars': batch['loss_scalars'],
+                'figures': figures,
+            }
+    def validation_epoch_end(self, outputs):
+        # handle multiple validation sets
+        multi_outputs = [outputs] if not isinstance(outputs[0], (list, tuple)) else outputs
+        multi_val_metrics = defaultdict(list)
+        for valset_idx, outputs in enumerate(multi_outputs):
+            # since pl performs sanity_check at the very begining of the training
+            cur_epoch = self.trainer.current_epoch
+            if not self.trainer.resume_from_checkpoint and self.trainer.running_sanity_check:
+                cur_epoch = -1
+            if self.config.DATASET.VAL_DATA_SOURCE == "VisTir":
+                metrics_per_scene = {}
+                for o in outputs:
+                    if not o['metrics']['scene_id'][0] in metrics_per_scene.keys():
+                        metrics_per_scene[o['metrics']['scene_id'][0]] = []
+                    metrics_per_scene[o['metrics']['scene_id'][0]].append(o['metrics'])
+                aucs_per_scene = {}
+                for scene_id in metrics_per_scene.keys():
+                    # 2. val metrics: dict of list, numpy
+                    _metrics = metrics_per_scene[scene_id]
+                    metrics = {k: flattenList(all_gather(flattenList([_me[k] for _me in _metrics]))) for k in _metrics[0]}
+                    # NOTE: all ranks need to `aggregate_merics`, but only log at rank-0
+                    val_metrics = aggregate_metrics(metrics, self.config.TRAINER.EPI_ERR_THR)
+                    aucs_per_scene[scene_id] = val_metrics
+                # average the metrics of scenes
+                # since the number of images in each scene is different
+                val_metrics_4tb = {}
+                for thr in [5, 10, 20]:
+                    temp = []
+                    for scene_id in metrics_per_scene.keys():
+                        temp.append(aucs_per_scene[scene_id][f'auc@{thr}'])
+                    val_metrics_4tb[f'auc@{thr}'] = float(np.array(temp, dtype=float).mean())
+                temp = []
+                for scene_id in metrics_per_scene.keys():
+                    temp.append(aucs_per_scene[scene_id][f'prec@{self.config.TRAINER.EPI_ERR_THR:.0e}'])
+                val_metrics_4tb[f'prec@{self.config.TRAINER.EPI_ERR_THR:.0e}'] = float(np.array(temp, dtype=float).mean())
+            else:
+                # 1. loss_scalars: dict of list, on cpu
+                _loss_scalars = [o['loss_scalars'] for o in outputs]
+                loss_scalars = {k: flattenList(all_gather([_ls[k] for _ls in _loss_scalars])) for k in _loss_scalars[0]}
+                # 2. val metrics: dict of list, numpy
+                _metrics = [o['metrics'] for o in outputs]
+                metrics = {k: flattenList(all_gather(flattenList([_me[k] for _me in _metrics]))) for k in _metrics[0]}
+                # NOTE: all ranks need to `aggregate_merics`, but only log at rank-0
+                val_metrics_4tb = aggregate_metrics(metrics, self.config.TRAINER.EPI_ERR_THR)
+            for thr in [5, 10, 20]:
+                multi_val_metrics[f'auc@{thr}'].append(val_metrics_4tb[f'auc@{thr}'])
+            # 3. figures
+            _figures = [o['figures'] for o in outputs]
+            figures = {k: flattenList(gather(flattenList([_me[k] for _me in _figures]))) for k in _figures[0]}
+            # tensorboard records only on rank 0
+            if self.trainer.global_rank == 0:
+                if self.config.DATASET.VAL_DATA_SOURCE != "VisTir":
+                    for k, v in loss_scalars.items():
+                        mean_v = torch.stack(v).mean()
+                        self.logger.experiment.add_scalar(f'val_{valset_idx}/avg_{k}', mean_v, global_step=cur_epoch)
+                for k, v in val_metrics_4tb.items():
+                    self.logger[0].experiment.add_scalar(f"metrics_{valset_idx}/{k}", v, global_step=cur_epoch)
+                    if self.config.TRAINER.USE_WANDB:
+                        self.logger[1].log_metrics({f"metrics_{valset_idx}/{k}": v}, cur_epoch)
+                for k, v in figures.items():
+                    if self.trainer.global_rank == 0:
+                        for plot_idx, fig in enumerate(v):
+                            self.logger[0].experiment.add_figure(
+                                f'val_match_{valset_idx}/{k}/pair-{plot_idx}', fig, cur_epoch, close=True)
+            plt.close('all')
+        for thr in [5, 10, 20]:
+            # log on all ranks for ModelCheckpoint callback to work properly
+            self.log(f'auc@{thr}', torch.tensor(np.mean(multi_val_metrics[f'auc@{thr}'])))  # ckpt monitors on this
+    def test_step(self, batch, batch_idx):
+        with self.profiler.profile("XoFTR"):
+            self.matcher(batch)
+        ret_dict, rel_pair_names = self._compute_metrics(batch)
+        with self.profiler.profile("dump_results"):
+            if self.dump_dir is not None:
+                # dump results for further analysis
+                keys_to_save = {'mkpts0_f', 'mkpts1_f', 'mconf_f', 'epi_errs'}
+                pair_names = list(zip(*batch['pair_names']))
+                bs = batch['image0'].shape[0]
+                dumps = []
+                for b_id in range(bs):
+                    item = {}
+                    mask = batch['m_bids'] == b_id
+                    item['pair_names'] = pair_names[b_id]
+                    item['identifier'] = '#'.join(rel_pair_names[b_id])
+                    if self.config.DATASET.TEST_DATA_SOURCE == "VisTir":
+                        item['scene_id'] = batch['scene_id']
+                        item['K0'] = batch['K0'][b_id].cpu().numpy()
+                        item['K1'] = batch['K1'][b_id].cpu().numpy()
+                        item['dist0'] = batch['dist0'][b_id].cpu().numpy()
+                        item['dist1'] = batch['dist1'][b_id].cpu().numpy()
+                    for key in keys_to_save:
+                        item[key] = batch[key][mask].cpu().numpy()
+                    for key in ['R_errs', 't_errs', 'inliers']:
+                        item[key] = batch[key][b_id]
+                    dumps.append(item)
+                ret_dict['dumps'] = dumps
+        return ret_dict
+    def test_epoch_end(self, outputs):
+        if self.config.DATASET.TEST_DATA_SOURCE == "VisTir":
+            # metrics: dict of list, numpy
+            metrics_per_scene = {}
+            for o in outputs:
+                if not o['metrics']['scene_id'][0] in metrics_per_scene.keys():
+                    metrics_per_scene[o['metrics']['scene_id'][0]] = []
+                metrics_per_scene[o['metrics']['scene_id'][0]].append(o['metrics'])
+            aucs_per_scene = {}
+            for scene_id in metrics_per_scene.keys():
+                # 2. val metrics: dict of list, numpy
+                _metrics = metrics_per_scene[scene_id]
+                metrics = {k: flattenList(all_gather(flattenList([_me[k] for _me in _metrics]))) for k in _metrics[0]}
+                # NOTE: all ranks need to `aggregate_merics`, but only log at rank-0
+                val_metrics = aggregate_metrics(metrics, self.config.TRAINER.EPI_ERR_THR)
+                aucs_per_scene[scene_id] = val_metrics
+            # average the metrics of scenes
+            # since the number of images in each scene is different
+            val_metrics_4tb = {}
+            for thr in [5, 10, 20]:
+                temp = []
+                for scene_id in metrics_per_scene.keys():
+                    temp.append(aucs_per_scene[scene_id][f'auc@{thr}'])
+                val_metrics_4tb[f'auc@{thr}'] = np.array(temp, dtype=float).mean()
+        else:
+            # metrics: dict of list, numpy
+            _metrics = [o['metrics'] for o in outputs]
+            metrics = {k: flattenList(gather(flattenList([_me[k] for _me in _metrics]))) for k in _metrics[0]}
+        # [{key: [{...}, *#bs]}, *#batch]
+        if self.dump_dir is not None:
+            Path(self.dump_dir).mkdir(parents=True, exist_ok=True)
+            _dumps = flattenList([o['dumps'] for o in outputs])  # [{...}, #bs*#batch]
+            dumps = flattenList(gather(_dumps))  # [{...}, #proc*#bs*#batch]
+            logger.info(f'Prediction and evaluation results will be saved to: {self.dump_dir}')
+        if self.trainer.global_rank == 0:
+            print(self.profiler.summary())
+            val_metrics_4tb = aggregate_metrics(metrics, self.config.TRAINER.EPI_ERR_THR)
+            logger.info('\n' + pprint.pformat(val_metrics_4tb))
+            if self.dump_dir is not None:
+                np.save(Path(self.dump_dir) / 'XoFTR_pred_eval', dumps)

third_party/XoFTR/src/lightning/lightning_xoftr_pretrain.py ADDED Viewed

	@@ -0,0 +1,171 @@

+from loguru import logger
+import torch
+import pytorch_lightning as pl
+from matplotlib import pyplot as plt
+plt.switch_backend('agg')
+from src.xoftr import XoFTR_Pretrain
+from src.losses.xoftr_loss_pretrain import XoFTRLossPretrain
+from src.optimizers import build_optimizer, build_scheduler
+from src.utils.plotting import make_mae_figures
+from src.utils.comm import all_gather
+from src.utils.misc import lower_config, flattenList
+from src.utils.profiler import PassThroughProfiler
+from src.utils.pretrain_utils import generate_random_masks, get_target
+class PL_XoFTR_Pretrain(pl.LightningModule):
+    def __init__(self, config, pretrained_ckpt=None, profiler=None, dump_dir=None):
+        """
+        TODO:
+            - use the new version of PL logging API.
+        """
+        super().__init__()
+        # Misc
+        self.config = config  # full config
+        _config = lower_config(self.config)
+        self.xoftr_cfg = lower_config(_config['xoftr'])
+        self.profiler = profiler or PassThroughProfiler()
+        self.n_vals_plot = max(config.TRAINER.N_VAL_PAIRS_TO_PLOT // config.TRAINER.WORLD_SIZE, 1)
+        # generator to create the same masks for validation
+        self.val_seed = self.config.PRETRAIN.VAL_SEED
+        self.val_generator = torch.Generator(device="cuda").manual_seed(self.val_seed)
+        self.mae_margins = config.PRETRAIN.MAE_MARGINS
+        # Matcher: XoFTR
+        self.matcher = XoFTR_Pretrain(config=_config['xoftr'])
+        self.loss = XoFTRLossPretrain(_config)
+        # Pretrained weights
+        if pretrained_ckpt:
+            state_dict = torch.load(pretrained_ckpt, map_location='cpu')['state_dict']
+            self.matcher.load_state_dict(state_dict, strict=False)
+            logger.info(f"Load \'{pretrained_ckpt}\' as pretrained checkpoint")
+        # Testing
+        self.dump_dir = dump_dir
+    def configure_optimizers(self):
+        # FIXME: The scheduler did not work properly when `--resume_from_checkpoint`
+        optimizer = build_optimizer(self, self.config)
+        scheduler = build_scheduler(self.config, optimizer)
+        return [optimizer], [scheduler]
+    def optimizer_step(
+            self, epoch, batch_idx, optimizer, optimizer_idx,
+            optimizer_closure, on_tpu, using_native_amp, using_lbfgs):
+        # learning rate warm up
+        warmup_step = self.config.TRAINER.WARMUP_STEP
+        if self.trainer.global_step < warmup_step:
+            if self.config.TRAINER.WARMUP_TYPE == 'linear':
+                base_lr = self.config.TRAINER.WARMUP_RATIO * self.config.TRAINER.TRUE_LR
+                lr = base_lr + \
+                    (self.trainer.global_step / self.config.TRAINER.WARMUP_STEP) * \
+                    abs(self.config.TRAINER.TRUE_LR - base_lr)
+                for pg in optimizer.param_groups:
+                    pg['lr'] = lr
+            elif self.config.TRAINER.WARMUP_TYPE == 'constant':
+                pass
+            else:
+                raise ValueError(f'Unknown lr warm-up strategy: {self.config.TRAINER.WARMUP_TYPE}')
+        # update params
+        optimizer.step(closure=optimizer_closure)
+        optimizer.zero_grad()
+    def _trainval_inference(self, batch, generator=None):
+        generate_random_masks(batch,
+                        patch_size=self.config.PRETRAIN.PATCH_SIZE,
+                        mask_ratio=self.config.PRETRAIN.MASK_RATIO,
+                        generator=generator,
+                        margins=self.mae_margins)
+        with self.profiler.profile("XoFTR"):
+            self.matcher(batch)
+        with self.profiler.profile("Compute losses"):
+            # Create target pacthes to reconstruct
+            get_target(batch)
+            self.loss(batch)
+    def training_step(self, batch, batch_idx):
+        self._trainval_inference(batch)
+        # logging
+        if self.trainer.global_rank == 0 and self.global_step % self.trainer.log_every_n_steps == 0:
+            # scalars
+            for k, v in batch['loss_scalars'].items():
+                self.logger[0].experiment.add_scalar(f'train/{k}', v, self.global_step)
+                if self.config.TRAINER.USE_WANDB:
+                    self.logger[1].log_metrics({f'train/{k}': v}, self.global_step)
+            if self.config.TRAINER.ENABLE_PLOTTING:
+                figures = make_mae_figures(batch)
+                for i, figure in enumerate(figures):
+                    self.logger[0].experiment.add_figure(
+                    f'train_mae/node_{self.trainer.global_rank}-device_{self.device.index}-batch_{i}',
+                    figure, self.global_step)
+        return {'loss': batch['loss']}
+    def training_epoch_end(self, outputs):
+        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
+        if self.trainer.global_rank == 0:
+            self.logger[0].experiment.add_scalar(
+                'train/avg_loss_on_epoch', avg_loss,
+                global_step=self.current_epoch)
+            if self.config.TRAINER.USE_WANDB:
+                self.logger[1].log_metrics(
+                    {'train/avg_loss_on_epoch': avg_loss},
+                    self.current_epoch)
+    def validation_step(self, batch, batch_idx):
+        self._trainval_inference(batch, self.val_generator)
+        val_plot_interval = max(self.trainer.num_val_batches[0] // \
+                                 (self.trainer.num_gpus * self.n_vals_plot), 1)
+        figures = []
+        if batch_idx % val_plot_interval == 0:
+            figures = make_mae_figures(batch)
+        return {
+            'loss_scalars': batch['loss_scalars'],
+            'figures': figures,
+        }
+    def validation_epoch_end(self, outputs):
+        self.val_generator.manual_seed(self.val_seed)
+        # handle multiple validation sets
+        multi_outputs = [outputs] if not isinstance(outputs[0], (list, tuple)) else outputs
+        for valset_idx, outputs in enumerate(multi_outputs):
+            # since pl performs sanity_check at the very begining of the training
+            cur_epoch = self.trainer.current_epoch
+            if not self.trainer.resume_from_checkpoint and self.trainer.running_sanity_check:
+                cur_epoch = -1
+            # 1. loss_scalars: dict of list, on cpu
+            _loss_scalars = [o['loss_scalars'] for o in outputs]
+            loss_scalars = {k: flattenList(all_gather([_ls[k] for _ls in _loss_scalars])) for k in _loss_scalars[0]}
+            _figures = [o['figures'] for o in outputs]
+            figures = [item for sublist in _figures for item in sublist]
+            # tensorboard records only on rank 0
+            if self.trainer.global_rank == 0:
+                for k, v in loss_scalars.items():
+                    mean_v = torch.stack(v).mean()
+                    self.logger[0].experiment.add_scalar(f'val_{valset_idx}/avg_{k}', mean_v, global_step=cur_epoch)
+                    if self.config.TRAINER.USE_WANDB:
+                        self.logger[1].log_metrics({f'val_{valset_idx}/avg_{k}': mean_v}, cur_epoch)
+                for plot_idx, fig in enumerate(figures):
+                    self.logger[0].experiment.add_figure(
+                        f'val_mae_{valset_idx}/pair-{plot_idx}', fig, cur_epoch, close=True)
+            plt.close('all')

third_party/XoFTR/src/losses/xoftr_loss.py ADDED Viewed

	@@ -0,0 +1,170 @@

+from loguru import logger
+import torch
+import torch.nn as nn
+from kornia.geometry.conversions import convert_points_to_homogeneous
+from kornia.geometry.epipolar import numeric
+class XoFTRLoss(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config  # config under the global namespace
+        self.loss_config = config['xoftr']['loss']
+        self.pos_w = self.loss_config['pos_weight']
+        self.neg_w = self.loss_config['neg_weight']
+    def compute_fine_matching_loss(self, data):
+        """ Point-wise Focal Loss with 0 / 1 confidence as gt.
+        Args:
+        data (dict): {
+            conf_matrix_fine (torch.Tensor): (N, W_f^2, W_f^2)
+            conf_matrix_f_gt (torch.Tensor): (N, W_f^2, W_f^2)
+            }
+        """
+        conf_matrix_fine = data['conf_matrix_fine']
+        conf_matrix_f_gt = data['conf_matrix_f_gt']
+        pos_mask, neg_mask = conf_matrix_f_gt > 0, conf_matrix_f_gt == 0
+        pos_w, neg_w = self.pos_w, self.neg_w
+        if not pos_mask.any():  # assign a wrong gt
+            pos_mask[0, 0, 0] = True
+            pos_w = 0.
+        if not neg_mask.any():
+            neg_mask[0, 0, 0] = True
+            neg_w = 0.
+        conf_matrix_fine = torch.clamp(conf_matrix_fine, 1e-6, 1-1e-6)
+        alpha = self.loss_config['focal_alpha']
+        gamma = self.loss_config['focal_gamma']
+        loss_pos = - alpha * torch.pow(1 - conf_matrix_fine[pos_mask], gamma) * (conf_matrix_fine[pos_mask]).log()
+        # loss_pos *= conf_matrix_f_gt[pos_mask]
+        loss_neg = - alpha * torch.pow(conf_matrix_fine[neg_mask], gamma) * (1 - conf_matrix_fine[neg_mask]).log()
+        return pos_w * loss_pos.mean() + neg_w * loss_neg.mean()
+    def _symmetric_epipolar_distance(self, pts0, pts1, E, K0, K1):
+            """Squared symmetric epipolar distance.
+            This can be seen as a biased estimation of the reprojection error.
+            Args:
+                pts0 (torch.Tensor): [N, 2]
+                E (torch.Tensor): [3, 3]
+            """
+            pts0 = (pts0 - K0[:, [0, 1], [2, 2]]) / K0[:, [0, 1], [0, 1]]
+            pts1 = (pts1 - K1[:, [0, 1], [2, 2]]) / K1[:, [0, 1], [0, 1]]
+            pts0 = convert_points_to_homogeneous(pts0)
+            pts1 = convert_points_to_homogeneous(pts1)
+            Ep0 = (pts0[:,None,:] @ E.transpose(-2,-1)).squeeze(1)  # [N, 3]
+            p1Ep0 = torch.sum(pts1 * Ep0, -1)  # [N,]
+            Etp1 = (pts1[:,None,:] @ E).squeeze(1)  # [N, 3]
+            d = p1Ep0**2 * (1.0 / (Ep0[:, 0]**2 + Ep0[:, 1]**2 + 1e-9) + 1.0 / (Etp1[:, 0]**2 + Etp1[:, 1]**2 + 1e-9))  # N
+            return d
+    def compute_sub_pixel_loss(self, data):
+        """ symmetric epipolar distance loss.
+        Args:
+        data (dict): {
+            m_bids (torch.Tensor): (N)
+            T_0to1 (torch.Tensor): (B, 4, 4)
+            mkpts0_f_train (torch.Tensor): (N, 2)
+            mkpts1_f_train (torch.Tensor): (N, 2)
+            }
+        """
+        Tx = numeric.cross_product_matrix(data['T_0to1'][:, :3, 3])
+        E_mat = Tx @ data['T_0to1'][:, :3, :3]
+        m_bids = data['m_bids']
+        pts0 = data['mkpts0_f_train']
+        pts1 = data['mkpts1_f_train']
+        sym_dist = self._symmetric_epipolar_distance(pts0, pts1, E_mat[m_bids], data['K0'][m_bids], data['K1'][m_bids])
+        # filter matches with high epipolar error (only train approximately correct fine-level matches)
+        loss = sym_dist[sym_dist<1e-4]
+        if len(loss) == 0:
+            return torch.zeros(1, device=loss.device, requires_grad=False)[0]
+        return loss.mean()
+    def compute_coarse_loss(self, data, weight=None):
+        """ Point-wise CE / Focal Loss with 0 / 1 confidence as gt.
+        Args:
+        data (dict): {
+            conf_matrix_0_to_1 (torch.Tensor): (N, HW0, HW1)
+            conf_matrix_1_to_0 (torch.Tensor): (N, HW0, HW1)
+            conf_gt (torch.Tensor): (N, HW0, HW1)
+            }
+            weight (torch.Tensor): (N, HW0, HW1)
+        """
+        conf_matrix_0_to_1 = data["conf_matrix_0_to_1"]
+        conf_matrix_1_to_0 = data["conf_matrix_1_to_0"]
+        conf_gt = data["conf_matrix_gt"]
+        pos_mask = conf_gt == 1
+        c_pos_w = self.pos_w
+        # corner case: no gt coarse-level match at all
+        if not pos_mask.any():  # assign a wrong gt
+            pos_mask[0, 0, 0] = True
+            if weight is not None:
+                weight[0, 0, 0] = 0.
+            c_pos_w = 0.
+        conf_matrix_0_to_1 = torch.clamp(conf_matrix_0_to_1, 1e-6, 1-1e-6)
+        conf_matrix_1_to_0 = torch.clamp(conf_matrix_1_to_0, 1e-6, 1-1e-6)
+        alpha = self.loss_config['focal_alpha']
+        gamma = self.loss_config['focal_gamma']
+        loss_pos = - alpha * torch.pow(1 - conf_matrix_0_to_1[pos_mask], gamma) * (conf_matrix_0_to_1[pos_mask]).log()
+        loss_pos += - alpha * torch.pow(1 - conf_matrix_1_to_0[pos_mask], gamma) * (conf_matrix_1_to_0[pos_mask]).log()
+        if weight is not None:
+            loss_pos = loss_pos * weight[pos_mask]
+        loss_c = (c_pos_w * loss_pos.mean())
+        return loss_c
+    @torch.no_grad()
+    def compute_c_weight(self, data):
+        """ compute element-wise weights for computing coarse-level loss. """
+        if 'mask0' in data:
+            c_weight = (data['mask0'].flatten(-2)[..., None] * data['mask1'].flatten(-2)[:, None]).float()
+        else:
+            c_weight = None
+        return c_weight
+    def forward(self, data):
+        """
+        Update:
+            data (dict): update{
+                'loss': [1] the reduced loss across a batch,
+                'loss_scalars' (dict): loss scalars for tensorboard_record
+            }
+        """
+        loss_scalars = {}
+        # 0. compute element-wise loss weight
+        c_weight = self.compute_c_weight(data)
+        # 1. coarse-level loss
+        loss_c = self.compute_coarse_loss(data, weight=c_weight)
+        loss_c *= self.loss_config['coarse_weight']
+        loss = loss_c
+        loss_scalars.update({"loss_c": loss_c.clone().detach().cpu()})
+        # 2. fine-level matching loss for windows
+        loss_f_match = self.compute_fine_matching_loss(data)
+        loss_f_match *= self.loss_config['fine_weight']
+        loss = loss + loss_f_match
+        loss_scalars.update({"loss_f":  loss_f_match.clone().detach().cpu()})
+        # 3. sub-pixel refinement loss
+        loss_sub = self.compute_sub_pixel_loss(data)
+        loss_sub *= self.loss_config['sub_weight']
+        loss = loss + loss_sub
+        loss_scalars.update({"loss_sub":  loss_sub.clone().detach().cpu()})
+        loss_scalars.update({'loss': loss.clone().detach().cpu()})
+        data.update({"loss": loss, "loss_scalars": loss_scalars})

third_party/XoFTR/src/losses/xoftr_loss_pretrain.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class XoFTRLossPretrain(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config  # config under the global namespace
+        self.W_f = config["xoftr"]['fine_window_size']
+    def forward(self, data):
+        """
+        Update:
+            data (dict): update{
+                'loss': [1] the reduced loss across a batch,
+                'loss_scalars' (dict): loss scalars for tensorboard_record
+            }
+        """
+        loss_scalars = {}
+        pred0, pred1 = data["pred0"], data["pred1"]
+        target0, target1 = data["target0"], data["target1"]
+        target0 = target0[[data['b_ids'], data['i_ids']]]
+        target1 = target1[[data['b_ids'], data['j_ids']]]
+        # get correct indices
+        pred0 = pred0[data["ids_image0"]]
+        pred1 = pred1[data["ids_image1"]]
+        target0 = target0[data["ids_image0"]]
+        target1 = target1[data["ids_image1"]]
+        loss0 = (pred0 - target0)**2
+        loss1 = (pred1 - target1)**2
+        loss = loss0.mean() + loss1.mean()
+        loss_scalars.update({'loss': loss.clone().detach().cpu()})
+        data.update({"loss": loss, "loss_scalars": loss_scalars})

third_party/XoFTR/src/optimizers/__init__.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch
+from torch.optim.lr_scheduler import MultiStepLR, CosineAnnealingLR, ExponentialLR
+def build_optimizer(model, config):
+    name = config.TRAINER.OPTIMIZER
+    lr = config.TRAINER.TRUE_LR
+    if name == "adam":
+        return torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.TRAINER.ADAM_DECAY)
+    elif name == "adamw":
+        return torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=config.TRAINER.ADAMW_DECAY)
+    else:
+        raise ValueError(f"TRAINER.OPTIMIZER = {name} is not a valid optimizer!")
+def build_scheduler(config, optimizer):
+    """
+    Returns:
+        scheduler (dict):{
+            'scheduler': lr_scheduler,
+            'interval': 'step',  # or 'epoch'
+            'monitor': 'val_f1', (optional)
+            'frequency': x, (optional)
+        }
+    """
+    scheduler = {'interval': config.TRAINER.SCHEDULER_INTERVAL}
+    name = config.TRAINER.SCHEDULER
+    if name == 'MultiStepLR':
+        scheduler.update(
+            {'scheduler': MultiStepLR(optimizer, config.TRAINER.MSLR_MILESTONES, gamma=config.TRAINER.MSLR_GAMMA)})
+    elif name == 'CosineAnnealing':
+        scheduler.update(
+            {'scheduler': CosineAnnealingLR(optimizer, config.TRAINER.COSA_TMAX)})
+    elif name == 'ExponentialLR':
+        scheduler.update(
+            {'scheduler': ExponentialLR(optimizer, config.TRAINER.ELR_GAMMA)})
+    else:
+        raise NotImplementedError()
+    return scheduler

third_party/XoFTR/src/utils/augment.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import albumentations as A
+import numpy as np
+import cv2
+class DarkAug(object):
+    """
+    Extreme dark augmentation aiming at Aachen Day-Night
+    """
+    def __init__(self):
+        self.augmentor = A.Compose([
+            A.RandomBrightnessContrast(p=0.75, brightness_limit=(-0.6, 0.0), contrast_limit=(-0.5, 0.3)),
+            A.Blur(p=0.1, blur_limit=(3, 9)),
+            A.MotionBlur(p=0.2, blur_limit=(3, 25)),
+            A.RandomGamma(p=0.1, gamma_limit=(15, 65)),
+            A.HueSaturationValue(p=0.1, val_shift_limit=(-100, -40))
+        ], p=0.75)
+    def __call__(self, x):
+        return self.augmentor(image=x)['image']
+class MobileAug(object):
+    """
+    Random augmentations aiming at images of mobile/handhold devices.
+    """
+    def __init__(self):
+        self.augmentor = A.Compose([
+            A.MotionBlur(p=0.25),
+            A.ColorJitter(p=0.5),
+            A.RandomRain(p=0.1),  # random occlusion
+            A.RandomSunFlare(p=0.1),
+            A.JpegCompression(p=0.25),
+            A.ISONoise(p=0.25)
+        ], p=1.0)
+    def __call__(self, x):
+        return self.augmentor(image=x)['image']
+class RGBThermalAug(object):
+    """
+    Pseudo-thermal image augmentation
+    """
+    def __init__(self):
+        self.blur =  A.Blur(p=0.7, blur_limit=(2, 4))
+        self.hsv = A.HueSaturationValue(p=0.9, val_shift_limit=(-30, +30), hue_shift_limit=(-90,+90), sat_shift_limit=(-30,+30))
+        # Switch images to apply augmentation
+        self.random_switch = True
+        # parameters for the cosine transform
+        self.w_0 = np.pi * 2 / 3
+        self.w_r = np.pi / 2
+        self.theta_r = np.pi / 2
+    def augment_pseudo_thermal(self, image):
+        # HSV augmentation
+        image = self.hsv(image=image)["image"]
+        # Random blur
+        image = self.blur(image=image)["image"]
+        # Convert the image to the gray scale
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+        # Normalize the image between (-0.5, 0.5)
+        image = image / 255 - 0.5 # 8 bit color
+        # Random phase and freq for the cosine transform
+        phase = np.pi / 2 + np.random.randn(1) * self.theta_r
+        w = self.w_0 + np.abs(np.random.randn(1)) *  self.w_r
+        # Cosine transform
+        image = np.cos(image * w + phase)
+        # Min-max normalization for the transformed image
+        image = (image - image.min()) / (image.max() - image.min()) * 255
+        # 3 channel gray
+        image = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_GRAY2RGB)
+        return image
+    def __call__(self, x, image_num):
+        if image_num==0:
+            # augmentation for RGB image can be added here
+            return  x
+        elif image_num==1:
+            # pseudo-thermal augmentation
+            return self.augment_pseudo_thermal(x)
+        else:
+            raise ValueError(f'Invalid image number: {image_num}')
+def build_augmentor(method=None, **kwargs):
+    if method == 'dark':
+        return DarkAug()
+    elif method == 'mobile':
+        return MobileAug()
+    elif method == "rgb_thermal":
+        return RGBThermalAug()
+    elif method is None:
+        return None
+    else:
+        raise ValueError(f'Invalid augmentation method: {method}')
+if __name__ == '__main__':
+    augmentor = build_augmentor('FDA')

third_party/XoFTR/src/utils/comm.py ADDED Viewed

	@@ -0,0 +1,265 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+[Copied from detectron2]
+This file contains primitives for multi-gpu communication.
+This is useful when doing distributed training.
+"""
+import functools
+import logging
+import numpy as np
+import pickle
+import torch
+import torch.distributed as dist
+_LOCAL_PROCESS_GROUP = None
+"""
+A torch process group which only includes processes that on the same machine as the current process.
+This variable is set when processes are spawned by `launch()` in "engine/launch.py".
+"""
+def get_world_size() -> int:
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+def get_rank() -> int:
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+def get_local_rank() -> int:
+    """
+    Returns:
+        The rank of the current process within the local (per-machine) process group.
+    """
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    assert _LOCAL_PROCESS_GROUP is not None
+    return dist.get_rank(group=_LOCAL_PROCESS_GROUP)
+def get_local_size() -> int:
+    """
+    Returns:
+        The size of the per-machine process group,
+        i.e. the number of processes per machine.
+    """
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)
+def is_main_process() -> bool:
+    return get_rank() == 0
+def synchronize():
+    """
+    Helper function to synchronize (barrier) among all processes when
+    using distributed training
+    """
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+@functools.lru_cache()
+def _get_global_gloo_group():
+    """
+    Return a process group based on gloo backend, containing all the ranks
+    The result is cached.
+    """
+    if dist.get_backend() == "nccl":
+        return dist.new_group(backend="gloo")
+    else:
+        return dist.group.WORLD
+def _serialize_to_tensor(data, group):
+    backend = dist.get_backend(group)
+    assert backend in ["gloo", "nccl"]
+    device = torch.device("cpu" if backend == "gloo" else "cuda")
+    buffer = pickle.dumps(data)
+    if len(buffer) > 1024 ** 3:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
+                get_rank(), len(buffer) / (1024 ** 3), device
+            )
+        )
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to(device=device)
+    return tensor
+def _pad_to_largest_tensor(tensor, group):
+    """
+    Returns:
+        list[int]: size of the tensor, on each rank
+        Tensor: padded tensor that has the max size
+    """
+    world_size = dist.get_world_size(group=group)
+    assert (
+        world_size >= 1
+    ), "comm.gather/all_gather must be called from ranks within the given group!"
+    local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
+    size_list = [
+        torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
+    ]
+    dist.all_gather(size_list, local_size, group=group)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    if local_size != max_size:
+        padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
+        tensor = torch.cat((tensor, padding), dim=0)
+    return size_list, tensor
+def all_gather(data, group=None):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors).
+    Args:
+        data: any picklable object
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group) == 1:
+        return [data]
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    max_size = max(size_list)
+    # receiving Tensor from all ranks
+    tensor_list = [
+        torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
+    ]
+    dist.all_gather(tensor_list, tensor, group=group)
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+    return data_list
+def gather(data, dst=0, group=None):
+    """
+    Run gather on arbitrary picklable data (not necessarily tensors).
+    Args:
+        data: any picklable object
+        dst (int): destination rank
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: on dst, a list of data gathered from each rank. Otherwise,
+            an empty list.
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group=group) == 1:
+        return [data]
+    rank = dist.get_rank(group=group)
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    # receiving Tensor from all ranks
+    if rank == dst:
+        max_size = max(size_list)
+        tensor_list = [
+            torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
+        ]
+        dist.gather(tensor, tensor_list, dst=dst, group=group)
+        data_list = []
+        for size, tensor in zip(size_list, tensor_list):
+            buffer = tensor.cpu().numpy().tobytes()[:size]
+            data_list.append(pickle.loads(buffer))
+        return data_list
+    else:
+        dist.gather(tensor, [], dst=dst, group=group)
+        return []
+def shared_random_seed():
+    """
+    Returns:
+        int: a random number that is the same across all workers.
+            If workers need a shared RNG, they can use this shared seed to
+            create one.
+    All workers must call this function, otherwise it will deadlock.
+    """
+    ints = np.random.randint(2 ** 31)
+    all_ints = all_gather(ints)
+    return all_ints[0]
+def reduce_dict(input_dict, average=True):
+    """
+    Reduce the values in the dictionary from all processes so that process with rank
+    0 has the reduced results.
+    Args:
+        input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
+        average (bool): whether to do average or sum
+    Returns:
+        a dict with the same keys as input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.reduce(values, dst=0)
+        if dist.get_rank() == 0 and average:
+            # only main process gets accumulated, so only divide by
+            # world_size in this case
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict

third_party/XoFTR/src/utils/data_io.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import torch
+from torch import nn
+import numpy as np
+import cv2
+# import torchvision.transforms as transforms
+import torch.nn.functional as F
+from yacs.config import CfgNode as CN
+def lower_config(yacs_cfg):
+    if not isinstance(yacs_cfg, CN):
+        return yacs_cfg
+    return {k.lower(): lower_config(v) for k, v in yacs_cfg.items()}
+def upper_config(dict_cfg):
+    if not isinstance(dict_cfg, dict):
+        return dict_cfg
+    return {k.upper(): upper_config(v) for k, v in dict_cfg.items()}
+class DataIOWrapper(nn.Module):
+    """
+    Pre-propcess data from different sources
+    """
+    def __init__(self, model, config, ckpt=None):
+        super().__init__()
+        self.device = torch.device('cuda:{}'.format(0) if torch.cuda.is_available() else 'cpu')
+        torch.set_grad_enabled(False)
+        self.model = model
+        self.config = config
+        self.img0_size = config['img0_resize']
+        self.img1_size = config['img1_resize']
+        self.df = config['df']
+        self.padding = config['padding']
+        self.coarse_scale = config['coarse_scale']
+        if ckpt:
+            ckpt_dict = torch.load(ckpt)
+            self.model.load_state_dict(ckpt_dict['state_dict'])
+            self.model = self.model.eval().to(self.device)
+    def preprocess_image(self, img, device, resize=None, df=None, padding=None, cam_K=None, dist=None, gray_scale=True):
+        # xoftr takes grayscale input images
+        if gray_scale and len(img.shape) == 3:
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        h, w = img.shape[:2]
+        new_K = None
+        img_undistorted = None
+        if cam_K is not None and dist is not None:
+            new_K, roi = cv2.getOptimalNewCameraMatrix(cam_K, dist, (w,h), 0, (w,h))
+            img = cv2.undistort(img, cam_K, dist, None, new_K)
+            img_undistorted = img.copy()
+        if resize is not None:
+            scale = resize / max(h, w)
+            w_new, h_new = int(round(w*scale)), int(round(h*scale))
+        else:
+            w_new, h_new = w, h
+        if df is not None:
+            w_new, h_new = map(lambda x: int(x // df * df), [w_new, h_new])
+        img = cv2.resize(img, (w_new, h_new))
+        scale = np.array([w/w_new, h/h_new], dtype=np.float)
+        if padding:  # padding
+            pad_to = max(h_new, w_new)
+            img, mask = self.pad_bottom_right(img, pad_to, ret_mask=True)
+            mask = torch.from_numpy(mask).to(device)
+        else:
+            mask = None
+        # img = transforms.functional.to_tensor(img).unsqueeze(0).to(device)
+        if len(img.shape) == 2:  # grayscale image
+            img = torch.from_numpy(img)[None][None].cuda().float() / 255.0
+        else:  # Color image
+            img = torch.from_numpy(img).permute(2, 0, 1)[None].float() / 255.0
+        return img, scale, mask, new_K, img_undistorted
+    def from_cv_imgs(self, img0, img1, K0=None, K1=None, dist0=None, dist1=None):
+        img0_tensor, scale0, mask0, new_K0, img0_undistorted = self.preprocess_image(
+            img0, self.device, resize=self.img0_size, df=self.df, padding=self.padding, cam_K=K0, dist=dist0)
+        img1_tensor, scale1, mask1, new_K1, img1_undistorted = self.preprocess_image(
+            img1, self.device, resize=self.img1_size, df=self.df, padding=self.padding, cam_K=K1, dist=dist1)
+        mkpts0, mkpts1, mconf = self.match_images(img0_tensor, img1_tensor, mask0, mask1)
+        mkpts0 = mkpts0 * scale0
+        mkpts1 = mkpts1 * scale1
+        matches = np.concatenate([mkpts0, mkpts1], axis=1)
+        data = {'matches':matches,
+                'mkpts0':mkpts0,
+                'mkpts1':mkpts1,
+                'mconf':mconf,
+                'img0':img0,
+                'img1':img1
+                }
+        if K0 is not None and dist0 is not None:
+            data.update({'new_K0':new_K0, 'img0_undistorted':img0_undistorted})
+        if K1 is not None and dist1 is not None:
+            data.update({'new_K1':new_K1, 'img1_undistorted':img1_undistorted})
+        return data
+    def from_paths(self, img0_pth, img1_pth, K0=None, K1=None, dist0=None, dist1=None, read_color=False):
+        imread_flag = cv2.IMREAD_COLOR if read_color else cv2.IMREAD_GRAYSCALE
+        img0 = cv2.imread(img0_pth, imread_flag)
+        img1 = cv2.imread(img1_pth, imread_flag)
+        return self.from_cv_imgs(img0, img1, K0=K0, K1=K1, dist0=dist0, dist1=dist1)
+    def match_images(self, image0, image1, mask0, mask1):
+        batch = {'image0': image0, 'image1': image1}
+        if mask0 is not None:  # img_padding is True
+            if self.coarse_scale:
+                [ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(),
+                                                       scale_factor=self.coarse_scale,
+                                                       mode='nearest',
+                                                       recompute_scale_factor=False)[0].bool()
+            batch.update({'mask0': ts_mask_0.unsqueeze(0), 'mask1': ts_mask_1.unsqueeze(0)})
+        self.model(batch)
+        mkpts0 = batch['mkpts0_f'].cpu().numpy()
+        mkpts1 = batch['mkpts1_f'].cpu().numpy()
+        mconf = batch['mconf_f'].cpu().numpy()
+        return mkpts0, mkpts1, mconf
+    def pad_bottom_right(self, inp, pad_size, ret_mask=False):
+        assert isinstance(pad_size, int) and pad_size >= max(inp.shape[-2:]), f"{pad_size} < {max(inp.shape[-2:])}"
+        mask = None
+        if inp.ndim == 2:
+            padded = np.zeros((pad_size, pad_size), dtype=inp.dtype)
+            padded[:inp.shape[0], :inp.shape[1]] = inp
+            if ret_mask:
+                mask = np.zeros((pad_size, pad_size), dtype=bool)
+                mask[:inp.shape[0], :inp.shape[1]] = True
+        elif inp.ndim == 3:
+            padded = np.zeros((inp.shape[0], pad_size, pad_size), dtype=inp.dtype)
+            padded[:, :inp.shape[1], :inp.shape[2]] = inp
+            if ret_mask:
+                mask = np.zeros((inp.shape[0], pad_size, pad_size), dtype=bool)
+                mask[:, :inp.shape[1], :inp.shape[2]] = True
+        else:
+            raise NotImplementedError()
+        return padded, mask