File size: 6,018 Bytes
9d3cb0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import os
import typing

import torch
import torch.distributed as dist
from torch.nn.parallel import DataParallel
from torch.nn.parallel import DistributedDataParallel

from ..data.datasets import ResumableDistributedSampler as DistributedSampler
from ..data.datasets import ResumableSequentialSampler as SequentialSampler


class Accelerator:  # pragma: no cover
    """This class is used to prepare models and dataloaders for
    usage with DDP or DP. Use the functions prepare_model, prepare_dataloader to
    prepare the respective objects. In the case of models, they are moved to
    the appropriate GPU and SyncBatchNorm is applied to them. In the case of
    dataloaders, a sampler is created and the dataloader is initialized with
    that sampler.

    If the world size is 1, prepare_model and prepare_dataloader are
    no-ops. If the environment variable ``LOCAL_RANK`` is not set, then the
    script was launched without ``torchrun``, and ``DataParallel``
    will be used instead of ``DistributedDataParallel`` (not recommended), if
    the world size (number of GPUs) is greater than 1.

    Parameters
    ----------
    amp : bool, optional
        Whether or not to enable automatic mixed precision, by default False
    """

    def __init__(self, amp: bool = False):
        local_rank = os.getenv("LOCAL_RANK", None)
        self.world_size = torch.cuda.device_count()

        self.use_ddp = self.world_size > 1 and local_rank is not None
        self.use_dp = self.world_size > 1 and local_rank is None
        self.device = "cpu" if self.world_size == 0 else "cuda"

        if self.use_ddp:
            local_rank = int(local_rank)
            dist.init_process_group(
                "nccl",
                init_method="env://",
                world_size=self.world_size,
                rank=local_rank,
            )

        self.local_rank = 0 if local_rank is None else local_rank
        self.amp = amp

        class DummyScaler:
            def __init__(self):
                pass

            def step(self, optimizer):
                optimizer.step()

            def scale(self, loss):
                return loss

            def unscale_(self, optimizer):
                return optimizer

            def update(self):
                pass

        self.scaler = torch.cuda.amp.GradScaler() if amp else DummyScaler()
        self.device_ctx = (
            torch.cuda.device(self.local_rank) if torch.cuda.is_available() else None
        )

    def __enter__(self):
        if self.device_ctx is not None:
            self.device_ctx.__enter__()
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        if self.device_ctx is not None:
            self.device_ctx.__exit__(exc_type, exc_value, traceback)

    def prepare_model(self, model: torch.nn.Module, **kwargs):
        """Prepares model for DDP or DP. The model is moved to
        the device of the correct rank.

        Parameters
        ----------
        model : torch.nn.Module
            Model that is converted for DDP or DP.

        Returns
        -------
        torch.nn.Module
            Wrapped model, or original model if DDP and DP are turned off.
        """
        model = model.to(self.device)
        if self.use_ddp:
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
            model = DistributedDataParallel(
                model, device_ids=[self.local_rank], **kwargs
            )
        elif self.use_dp:
            model = DataParallel(model, **kwargs)
        return model

    # Automatic mixed-precision utilities
    def autocast(self, *args, **kwargs):
        """Context manager for autocasting. Arguments
        go to ``torch.cuda.amp.autocast``.
        """
        return torch.cuda.amp.autocast(self.amp, *args, **kwargs)

    def backward(self, loss: torch.Tensor):
        """Backwards pass, after scaling the loss if ``amp`` is
        enabled.

        Parameters
        ----------
        loss : torch.Tensor
            Loss value.
        """
        self.scaler.scale(loss).backward()

    def step(self, optimizer: torch.optim.Optimizer):
        """Steps the optimizer, using a ``scaler`` if ``amp`` is
        enabled.

        Parameters
        ----------
        optimizer : torch.optim.Optimizer
            Optimizer to step forward.
        """
        self.scaler.step(optimizer)

    def update(self):
        """Updates the scale factor."""
        self.scaler.update()

    def prepare_dataloader(
        self, dataset: typing.Iterable, start_idx: int = None, **kwargs
    ):
        """Wraps a dataset with a DataLoader, using the correct sampler if DDP is
        enabled.

        Parameters
        ----------
        dataset : typing.Iterable
            Dataset to build Dataloader around.
        start_idx : int, optional
            Start index of sampler, useful if resuming from some epoch,
            by default None

        Returns
        -------
        _type_
            _description_
        """

        if self.use_ddp:
            sampler = DistributedSampler(
                dataset,
                start_idx,
                num_replicas=self.world_size,
                rank=self.local_rank,
            )
            if "num_workers" in kwargs:
                kwargs["num_workers"] = max(kwargs["num_workers"] // self.world_size, 1)
            kwargs["batch_size"] = max(kwargs["batch_size"] // self.world_size, 1)
        else:
            sampler = SequentialSampler(dataset, start_idx)

        dataloader = torch.utils.data.DataLoader(dataset, sampler=sampler, **kwargs)
        return dataloader

    @staticmethod
    def unwrap(model):
        """Unwraps the model if it was wrapped in DDP or DP, otherwise
        just returns the model. Use this to unwrap the model returned by
        :py:func:`audiotools.ml.accelerator.Accelerator.prepare_model`.
        """
        if hasattr(model, "module"):
            return model.module
        return model