|
import math |
|
|
|
import torch |
|
from torch import nn |
|
|
|
|
|
class PositionalEncoding(nn.Module): |
|
"""Sinusoidal positional encoding for non-recurrent neural networks. |
|
Implementation based on "Attention Is All You Need" |
|
|
|
Args: |
|
channels (int): embedding size |
|
dropout_p (float): dropout rate applied to the output. |
|
max_len (int): maximum sequence length. |
|
use_scale (bool): whether to use a learnable scaling coefficient. |
|
""" |
|
|
|
def __init__(self, channels, dropout_p=0.0, max_len=5000, use_scale=False): |
|
super().__init__() |
|
if channels % 2 != 0: |
|
raise ValueError( |
|
"Cannot use sin/cos positional encoding with " "odd channels (got channels={:d})".format(channels) |
|
) |
|
self.max_len = max_len |
|
self.use_scale = use_scale |
|
if use_scale: |
|
self.scale = torch.nn.Parameter(torch.ones(1)) |
|
pe = torch.zeros(max_len, channels) |
|
position = torch.arange(0, max_len).unsqueeze(1) |
|
div_term = torch.pow(10000, torch.arange(0, channels, 2).float() / channels) |
|
pe[:, 0::2] = torch.sin(position.float() * div_term) |
|
pe[:, 1::2] = torch.cos(position.float() * div_term) |
|
pe = pe.unsqueeze(0).transpose(1, 2) |
|
self.register_buffer("pe", pe) |
|
if dropout_p > 0: |
|
self.dropout = nn.Dropout(p=dropout_p) |
|
self.channels = channels |
|
|
|
def forward(self, x, mask=None, first_idx=None, last_idx=None): |
|
""" |
|
Shapes: |
|
x: [B, C, T] |
|
mask: [B, 1, T] |
|
first_idx: int |
|
last_idx: int |
|
""" |
|
|
|
x = x * math.sqrt(self.channels) |
|
if first_idx is None: |
|
if self.pe.size(2) < x.size(2): |
|
raise RuntimeError( |
|
f"Sequence is {x.size(2)} but PositionalEncoding is" |
|
f" limited to {self.pe.size(2)}. See max_len argument." |
|
) |
|
if mask is not None: |
|
pos_enc = self.pe[:, :, : x.size(2)] * mask |
|
else: |
|
pos_enc = self.pe[:, :, : x.size(2)] |
|
if self.use_scale: |
|
x = x + self.scale * pos_enc |
|
else: |
|
x = x + pos_enc |
|
else: |
|
if self.use_scale: |
|
x = x + self.scale * self.pe[:, :, first_idx:last_idx] |
|
else: |
|
x = x + self.pe[:, :, first_idx:last_idx] |
|
if hasattr(self, "dropout"): |
|
x = self.dropout(x) |
|
return x |
|
|