andreysher
commited on
Commit
•
d2d52b7
1
Parent(s):
f7f2696
Initial commit
Browse files- .gitattributes +1 -0
- .gitmodules +4 -0
- common.py +84 -0
- deeplabv3_mobilenet_v3_large/deeplabv3_mobilenet_v3_large.pth +3 -0
- deeplabv3_mobilenet_v3_large/deeplabv3_mobilenet_v3_large_x2.pth +3 -0
- deeplabv3_mobilenet_v3_large/deeplabv3_mobilenet_v3_large_x4.pth +3 -0
- requirements.txt +2 -0
- test.py +165 -0
- vision +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
deeplabv3_mobilenet_v3_large filter=lfs diff=lfs merge=lfs -text
|
.gitmodules
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "vision"]
|
2 |
+
path = vision
|
3 |
+
url = https://github.com/pytorch/vision
|
4 |
+
shallow = true
|
common.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torchvision
|
5 |
+
from fvcore.nn import FlopCountAnalysis
|
6 |
+
from torch import nn
|
7 |
+
from transforms import Compose
|
8 |
+
|
9 |
+
sys.path.append("vision/references/segmentation")
|
10 |
+
from coco_utils import ConvertCocoPolysToMask
|
11 |
+
from coco_utils import FilterAndRemapCocoCategories
|
12 |
+
from coco_utils import _coco_remove_images_without_annotations
|
13 |
+
from utils import ConfusionMatrix
|
14 |
+
|
15 |
+
|
16 |
+
class NanSafeConfusionMatrix(ConfusionMatrix):
|
17 |
+
"""Confusion matrix with replacement nans to zeros."""
|
18 |
+
|
19 |
+
def __init__(self, num_classes):
|
20 |
+
super().__init__(num_classes=num_classes)
|
21 |
+
|
22 |
+
def compute(self):
|
23 |
+
"""Compute metrics based on confusion matrix."""
|
24 |
+
confusion_matrix = self.mat.float()
|
25 |
+
acc_global = torch.nan_to_num(torch.diag(confusion_matrix).sum() / confusion_matrix.sum())
|
26 |
+
acc = torch.nan_to_num(torch.diag(confusion_matrix) / confusion_matrix.sum(1))
|
27 |
+
intersection_over_unions = torch.nan_to_num(
|
28 |
+
torch.diag(confusion_matrix)
|
29 |
+
/ (confusion_matrix.sum(1) + confusion_matrix.sum(0) - torch.diag(confusion_matrix))
|
30 |
+
)
|
31 |
+
return acc_global, acc, intersection_over_unions
|
32 |
+
|
33 |
+
|
34 |
+
def flops_calculation_function(model: nn.Module, input_sample: torch.Tensor) -> float:
|
35 |
+
"""Calculate number of flops in millions."""
|
36 |
+
counter = FlopCountAnalysis(
|
37 |
+
model=model.eval(),
|
38 |
+
inputs=input_sample,
|
39 |
+
)
|
40 |
+
counter.unsupported_ops_warnings(False)
|
41 |
+
counter.uncalled_modules_warnings(False)
|
42 |
+
|
43 |
+
flops = counter.total() / input_sample.shape[0]
|
44 |
+
|
45 |
+
return flops / 1e6
|
46 |
+
|
47 |
+
|
48 |
+
def get_coco(root, image_set, transforms, use_v2=False, use_orig=False):
|
49 |
+
"""Get COCO dataset with VOC or COCO classes."""
|
50 |
+
paths = {
|
51 |
+
"train": ("train2017", os.path.join("annotations", "instances_train2017.json")),
|
52 |
+
"val": ("val2017", os.path.join("annotations", "instances_val2017.json")),
|
53 |
+
# "train": ("val2017", os.path.join("annotations", "instances_val2017.json"))
|
54 |
+
}
|
55 |
+
if use_orig:
|
56 |
+
classes_list = list(range(81))
|
57 |
+
else:
|
58 |
+
classes_list = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72]
|
59 |
+
|
60 |
+
img_folder, ann_file = paths[image_set]
|
61 |
+
img_folder = os.path.join(root, img_folder)
|
62 |
+
ann_file = os.path.join(root, ann_file)
|
63 |
+
|
64 |
+
# The 2 "Compose" below achieve the same thing: converting coco detection
|
65 |
+
# samples into segmentation-compatible samples. They just do it with
|
66 |
+
# slightly different implementations. We could refactor and unify, but
|
67 |
+
# keeping them separate helps keeping the v2 version clean
|
68 |
+
if use_v2:
|
69 |
+
import v2_extras # pylint: disable=import-outside-toplevel
|
70 |
+
from torchvision.datasets import wrap_dataset_for_transforms_v2 # pylint: disable=import-outside-toplevel
|
71 |
+
|
72 |
+
transforms = Compose([v2_extras.CocoDetectionToVOCSegmentation(), transforms])
|
73 |
+
dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
|
74 |
+
dataset = wrap_dataset_for_transforms_v2(dataset, target_keys={"masks", "labels"})
|
75 |
+
else:
|
76 |
+
transforms = Compose(
|
77 |
+
[FilterAndRemapCocoCategories(classes_list, remap=True), ConvertCocoPolysToMask(), transforms]
|
78 |
+
)
|
79 |
+
dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
|
80 |
+
|
81 |
+
if image_set == "train":
|
82 |
+
dataset = _coco_remove_images_without_annotations(dataset, classes_list)
|
83 |
+
|
84 |
+
return dataset
|
deeplabv3_mobilenet_v3_large/deeplabv3_mobilenet_v3_large.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:366cfdd55f38a53aefe374c7f529cd05af2e4ba2c90848c202976376ff5e8c09
|
3 |
+
size 88767468
|
deeplabv3_mobilenet_v3_large/deeplabv3_mobilenet_v3_large_x2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb61548f8b66ead5a95b55ff41fa7db201fbf8340fab916f91fdac151f61d30e
|
3 |
+
size 48772992
|
deeplabv3_mobilenet_v3_large/deeplabv3_mobilenet_v3_large_x4.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:173a1084b1ac46643d5fb1a0f8c91a73b4ba790c25d9e2130e7b050cd23c9b22
|
3 |
+
size 27865280
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
torch==2.3.1
|
2 |
+
torchvision==0.18.1
|
test.py
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
from functools import partial
|
3 |
+
|
4 |
+
from typing import Callable
|
5 |
+
from typing import Dict
|
6 |
+
from typing import Tuple
|
7 |
+
from typing import Union
|
8 |
+
from argparse import Namespace
|
9 |
+
|
10 |
+
sys.path.append("vision/references/segmentation")
|
11 |
+
|
12 |
+
import presets
|
13 |
+
import torch
|
14 |
+
import torch.utils.data
|
15 |
+
import torchvision
|
16 |
+
import utils
|
17 |
+
from torch import nn
|
18 |
+
from common import flops_calculation_function
|
19 |
+
from common import NanSafeConfusionMatrix as ConfusionMatrix
|
20 |
+
from common import get_coco
|
21 |
+
|
22 |
+
|
23 |
+
def get_dataset(args: Namespace, is_train: bool, transform: Callable = None) -> Tuple[torch.utils.data.Dataset, int]:
|
24 |
+
def sbd(*args, **kwargs):
|
25 |
+
kwargs.pop("use_v2")
|
26 |
+
return torchvision.datasets.SBDataset(*args, mode="segmentation", **kwargs)
|
27 |
+
|
28 |
+
def voc(*args, **kwargs):
|
29 |
+
kwargs.pop("use_v2")
|
30 |
+
return torchvision.datasets.VOCSegmentation(*args, **kwargs)
|
31 |
+
|
32 |
+
paths = {
|
33 |
+
"voc": (args.data_path, voc, 21),
|
34 |
+
"voc_aug": (args.data_path, sbd, 21),
|
35 |
+
"coco": (args.data_path, get_coco, 21),
|
36 |
+
"coco_orig": (args.data_path, partial(get_coco, use_orig=True), 81)
|
37 |
+
}
|
38 |
+
p, ds_fn, num_classes = paths["coco_orig"]
|
39 |
+
|
40 |
+
if transform is None:
|
41 |
+
transform = get_transform(is_train, args)
|
42 |
+
image_set = "train" if is_train else "val"
|
43 |
+
ds = ds_fn(p, image_set=image_set, transforms=transform, use_v2=args.use_v2)
|
44 |
+
return ds, num_classes
|
45 |
+
|
46 |
+
|
47 |
+
def get_transform(is_train: bool, args: Namespace) -> Callable:
|
48 |
+
return presets.SegmentationPresetEval(base_size=520, backend=args.backend, use_v2=args.use_v2)
|
49 |
+
|
50 |
+
|
51 |
+
def criterion(inputs: Dict[str, torch.Tensor], target: Dict[str, torch.Tensor]) -> torch.Tensor:
|
52 |
+
losses = {}
|
53 |
+
for name, x in inputs.items():
|
54 |
+
losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255)
|
55 |
+
|
56 |
+
if len(losses) == 1:
|
57 |
+
return losses["out"]
|
58 |
+
|
59 |
+
return losses["out"] + 0.5 * losses["aux"]
|
60 |
+
|
61 |
+
|
62 |
+
def evaluate(
|
63 |
+
model: torch.nn.Module,
|
64 |
+
data_loader: torch.utils.data.DataLoader,
|
65 |
+
device: Union[str, torch.device],
|
66 |
+
num_classes: int,
|
67 |
+
criterion: Callable,
|
68 |
+
) -> Tuple[ConfusionMatrix, float]:
|
69 |
+
model.eval()
|
70 |
+
confmat = ConfusionMatrix(num_classes)
|
71 |
+
metric_logger = utils.MetricLogger(delimiter=" ")
|
72 |
+
header = "Test:"
|
73 |
+
num_processed_samples = 0
|
74 |
+
with torch.inference_mode():
|
75 |
+
for batch_n, (image, target) in enumerate(metric_logger.log_every(data_loader, 100, header)):
|
76 |
+
image, target = image.to(device), target.to(device)
|
77 |
+
output = model(image)
|
78 |
+
loss = criterion(output, target)
|
79 |
+
output = output["out"]
|
80 |
+
|
81 |
+
confmat.update(target.flatten(), output.argmax(1).flatten())
|
82 |
+
# FIXME need to take into account that the datasets
|
83 |
+
# could have been padded in distributed setup
|
84 |
+
num_processed_samples += image.shape[0]
|
85 |
+
|
86 |
+
metric_logger.update(loss=loss.item())
|
87 |
+
|
88 |
+
confmat.reduce_from_all_processes()
|
89 |
+
|
90 |
+
return confmat, metric_logger.loss.global_avg
|
91 |
+
|
92 |
+
|
93 |
+
def main(args):
|
94 |
+
if args.backend.lower() != "pil" and not args.use_v2:
|
95 |
+
# TODO: Support tensor backend in V1?
|
96 |
+
raise ValueError("Use --use-v2 if you want to use the tv_tensor or tensor backend.")
|
97 |
+
if args.use_v2:
|
98 |
+
raise ValueError("v2 is only supported for coco dataset for now.")
|
99 |
+
|
100 |
+
print(args)
|
101 |
+
|
102 |
+
device = torch.device(args.device)
|
103 |
+
|
104 |
+
if args.use_deterministic_algorithms:
|
105 |
+
torch.backends.cudnn.benchmark = False
|
106 |
+
torch.use_deterministic_algorithms(True)
|
107 |
+
else:
|
108 |
+
torch.backends.cudnn.benchmark = True
|
109 |
+
|
110 |
+
dataset_test, num_classes = get_dataset(args, is_train=False)
|
111 |
+
|
112 |
+
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
|
113 |
+
|
114 |
+
data_loader_test = torch.utils.data.DataLoader(
|
115 |
+
dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn
|
116 |
+
)
|
117 |
+
|
118 |
+
checkpoint = torch.load(args.model_path)
|
119 |
+
model = checkpoint["model"]
|
120 |
+
model.to(device)
|
121 |
+
model_flops = flops_calculation_function(model=model, input_sample=next(iter(data_loader_test))[0].to(device))
|
122 |
+
print(f"Model Flops: {model_flops}M")
|
123 |
+
|
124 |
+
# We disable the cudnn benchmarking because it can noticeably affect the accuracy
|
125 |
+
torch.backends.cudnn.benchmark = False
|
126 |
+
torch.backends.cudnn.deterministic = True
|
127 |
+
confmat, loss = evaluate(
|
128 |
+
model=model,
|
129 |
+
data_loader=data_loader_test,
|
130 |
+
device=device,
|
131 |
+
num_classes=num_classes,
|
132 |
+
criterion=criterion,
|
133 |
+
)
|
134 |
+
print(confmat)
|
135 |
+
return
|
136 |
+
|
137 |
+
def get_args_parser(add_help=True):
|
138 |
+
import argparse
|
139 |
+
|
140 |
+
parser = argparse.ArgumentParser(description="PyTorch Segmentation Training", add_help=add_help)
|
141 |
+
|
142 |
+
parser.add_argument("--data-path", default="/datasets01/COCO/022719/", type=str, help="dataset path")
|
143 |
+
parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)")
|
144 |
+
parser.add_argument(
|
145 |
+
"-b", "--batch-size", default=8, type=int, help="images per gpu, the total batch size is $NGPU x batch_size"
|
146 |
+
)
|
147 |
+
parser.add_argument("--epochs", default=30, type=int, metavar="N", help="number of total epochs to run")
|
148 |
+
|
149 |
+
parser.add_argument(
|
150 |
+
"-j", "--workers", default=16, type=int, metavar="N", help="number of data loading workers (default: 16)"
|
151 |
+
)
|
152 |
+
parser.add_argument(
|
153 |
+
"--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
|
154 |
+
)
|
155 |
+
# distributed training parameters
|
156 |
+
|
157 |
+
parser.add_argument("--backend", default="PIL", type=str.lower, help="PIL or tensor - case insensitive")
|
158 |
+
parser.add_argument("--use-v2", action="store_true", help="Use V2 transforms")
|
159 |
+
parser.add_argument("--model-path", default=None, help="Path to model checkpoint.")
|
160 |
+
return parser
|
161 |
+
|
162 |
+
|
163 |
+
if __name__ == "__main__":
|
164 |
+
args = get_args_parser().parse_args()
|
165 |
+
main(args)
|
vision
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit 126fc22ce33e6c2426edcf9ed540810c178fe9ce
|